diff --git a/shared/tree-sitter-extractor/src/extractor/mod.rs b/shared/tree-sitter-extractor/src/extractor/mod.rs index 436ff9f65a15..b066fbc85b30 100644 --- a/shared/tree-sitter-extractor/src/extractor/mod.rs +++ b/shared/tree-sitter-extractor/src/extractor/mod.rs @@ -280,10 +280,11 @@ pub fn location_label(writer: &mut trap::Writer, location: trap::Location) -> tr } /// Extracts the source file at `path`, which is assumed to be canonicalized. -/// When `yeast_runner` is `Some`, the parsed tree is first transformed -/// through the supplied yeast `Runner` before TRAP extraction. Building the -/// `Runner` (which parses YAML and constructs the schema) is the caller's -/// responsibility, allowing it to be done once and shared across files. +/// When `desugarer` is `Some`, the parsed tree is first transformed +/// through the supplied yeast desugarer before TRAP extraction. Building +/// the desugarer (which parses YAML and constructs the schema) is the +/// caller's responsibility, allowing it to be done once and shared across +/// files. #[allow(clippy::too_many_arguments)] pub fn extract( language: &Language, @@ -295,7 +296,7 @@ pub fn extract( path: &Path, source: &[u8], ranges: &[Range], - yeast_runner: Option<&yeast::Runner<'_>>, + desugarer: Option<&dyn yeast::Desugarer>, ) { let path_str = file_paths::normalize_and_transform_path(path, transformer); let source_root = std::env::current_dir() @@ -328,8 +329,8 @@ pub fn extract( schema, ); - if let Some(yeast_runner) = yeast_runner { - let ast = yeast_runner + if let Some(desugarer) = desugarer { + let ast = desugarer .run_from_tree(&tree, source) .unwrap_or_else(|e| panic!("Desugaring failed for {path_str}: {e}")); traverse_yeast(&ast, &mut visitor); diff --git a/shared/tree-sitter-extractor/src/extractor/simple.rs b/shared/tree-sitter-extractor/src/extractor/simple.rs index 6fcd29b03443..9ba6f21778cf 100644 --- a/shared/tree-sitter-extractor/src/extractor/simple.rs +++ b/shared/tree-sitter-extractor/src/extractor/simple.rs @@ -13,11 +13,14 @@ pub struct LanguageSpec { pub prefix: &'static str, pub ts_language: tree_sitter::Language, pub node_types: &'static str, - /// Optional yeast desugaring configuration. When set, the parsed - /// tree is rewritten through yeast before TRAP extraction. The - /// config's `output_node_types_yaml` (if set) provides the schema - /// used both at runtime (for the rewriter) and for TRAP validation. - pub desugar: Option, + /// Optional desugarer. When set, the parsed tree is rewritten through + /// the desugarer before TRAP extraction. The desugarer's + /// `output_node_types_yaml()` (if set) provides the schema used both + /// at runtime (for the rewriter) and for TRAP validation. + /// + /// `Box` so the shared extractor is agnostic to + /// the user-defined context type the desugarer uses internally. + pub desugar: Option>, pub file_globs: Vec, } @@ -91,35 +94,22 @@ impl Extractor { .collect(); let mut schemas = vec![]; - let mut yeast_runners = Vec::new(); for lang in &self.languages { - let effective_node_types: String = - match lang.desugar.as_ref().and_then(|c| c.output_node_types_yaml) { - Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| { - std::io::Error::other(format!( - "Failed to convert YAML node-types to JSON for {}: {e}", - lang.prefix - )) - })?, - None => lang.node_types.to_string(), - }; - let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?; - schemas.push(schema); - - // Build the yeast runner once per language so the YAML schema - // isn't re-parsed for every file. - let yeast_runner = lang + let effective_node_types: String = match lang .desugar .as_ref() - .map(|config| yeast::Runner::from_config(lang.ts_language.clone(), config)) - .transpose() - .map_err(|e| { + .and_then(|d| d.output_node_types_yaml()) + { + Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| { std::io::Error::other(format!( - "Failed to build desugaring runner for {}: {e}", + "Failed to convert YAML node-types to JSON for {}: {e}", lang.prefix )) - })?; - yeast_runners.push(yeast_runner); + })?, + None => lang.node_types.to_string(), + }; + let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?; + schemas.push(schema); } // Construct a single globset containing all language globs, @@ -194,7 +184,7 @@ impl Extractor { &path, &source, &[], - yeast_runners[i].as_ref(), + lang.desugar.as_deref(), ); std::fs::create_dir_all(src_archive_file.parent().unwrap())?; std::fs::copy(&path, &src_archive_file)?; diff --git a/shared/yeast-macros/src/lib.rs b/shared/yeast-macros/src/lib.rs index 07077be51f04..7153cf306443 100644 --- a/shared/yeast-macros/src/lib.rs +++ b/shared/yeast-macros/src/lib.rs @@ -121,3 +121,37 @@ pub fn rule(input: TokenStream) -> TokenStream { Err(err) => err.to_compile_error().into(), } } + +/// Define a desugaring rule whose transform is a hand-written Rust block. +/// +/// Use `manual_rule!` when the transform needs control over capture +/// translation timing — for example, when an outer rule needs to set +/// state in `ctx` (the `BuildCtx`'s user context) before recursive +/// translation reaches inner rules that read that state. +/// +/// ```text +/// manual_rule!( +/// (query_pattern field: (_) @name) +/// { +/// // `ctx` is a `&mut BuildCtx<'_, C>`; capture variables +/// // (`name: NodeRef`, etc.) are bound from the query. +/// let translated = ctx.translate(name)?; +/// Ok(translated) +/// } +/// ) +/// ``` +/// +/// Differences from [`rule!`]: +/// - Captures are **not** auto-translated before the body runs; they +/// refer to raw input-schema nodes. Use [`BuildCtx::translate`] (or +/// [`BuildCtx::translate_opt`]) to translate them when you choose. +/// - The body is plain Rust returning `Result, String>` — no +/// tree template, no `Ok(...)` wrap. +#[proc_macro] +pub fn manual_rule(input: TokenStream) -> TokenStream { + let input2: TokenStream2 = input.into(); + match parse::parse_manual_rule_top(input2) { + Ok(output) => output.into(), + Err(err) => err.to_compile_error().into(), + } +} diff --git a/shared/yeast-macros/src/parse.rs b/shared/yeast-macros/src/parse.rs index 4b27b9804392..fc6031eb39d2 100644 --- a/shared/yeast-macros/src/parse.rs +++ b/shared/yeast-macros/src/parse.rs @@ -121,9 +121,9 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result> { std::collections::HashMap::new(); let mut bare_children: Vec = Vec::new(); let push_field_elem = |order: &mut Vec, - map: &mut std::collections::HashMap>, - name: String, - elem: TokenStream| { + map: &mut std::collections::HashMap>, + name: String, + elem: TokenStream| { if !map.contains_key(&name) { order.push(name.clone()); map.insert(name, vec![elem]); @@ -160,8 +160,7 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result> { } else { let child = if peek_is_at(tokens) { tokens.next(); - let capture_name = - expect_ident(tokens, "expected capture name after @")?; + let capture_name = expect_ident(tokens, "expected capture name after @")?; let name_str = capture_name.to_string(); quote! { yeast::query::QueryNode::Capture { @@ -296,10 +295,10 @@ fn parse_query_list(tokens: &mut Tokens) -> Result> { // tree! / trees! parsing — direct code generation against BuildCtx // --------------------------------------------------------------------------- -const IMPLICIT_CTX: &str = "__yeast_ctx"; +const IMPLICIT_CTX: &str = "ctx"; /// Determine the context identifier: either explicit `ctx,` or the implicit -/// `__yeast_ctx` from an enclosing `rule!`. +/// `ctx` from an enclosing `rule!`. fn parse_ctx_or_implicit(tokens: &mut Tokens) -> Ident { // Check if first token is an ident followed by a comma let mut lookahead = tokens.clone(); @@ -359,7 +358,7 @@ fn parse_direct_node(tokens: &mut Tokens, ctx: &Ident) -> Result { Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => { let group = expect_group(tokens, Delimiter::Brace)?; let expr = group.stream(); - Ok(quote! { ::std::convert::Into::::into(#expr) }) + Ok(quote! { ::std::convert::Into::::into({ #expr }) }) } Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Parenthesis => { let group = expect_group(tokens, Delimiter::Parenthesis)?; @@ -396,7 +395,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result Result Result Result::into) + { #expr }.into_iter().map(::std::convert::Into::::into) } } else { let expr = group.stream(); - quote! { (#expr).into_iter() } + quote! { { #expr }.into_iter() } }; let chained = parse_chain_suffix(tokens, ctx, base)?; stmts.push(quote! { @@ -506,11 +510,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result Result { +fn parse_chain_suffix(tokens: &mut Tokens, ctx: &Ident, base: TokenStream) -> Result { let mut current = base; while matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.') { tokens.next(); // consume . @@ -608,7 +608,8 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result Result::into) + { #expr }.into_iter().map(::std::convert::Into::::into) } } else { let expr = group.stream(); - quote! { (#expr).into_iter() } + quote! { { #expr }.into_iter() } }; let chained = parse_chain_suffix(tokens, ctx, base)?; items.push(quote! { @@ -630,7 +631,7 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result::into(#expr)); + __nodes.push(::std::convert::Into::::into({ #expr })); }); } continue; @@ -888,10 +889,117 @@ pub fn parse_rule_top(input: TokenStream) -> Result { Ok(quote! { { let __query = #query_code; - yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option| { + yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| { + // Auto-translation prefix: recursively translate every + // captured node before invoking the user's transform body. + // For OneShot rules this preserves the legacy behaviour + // (input-schema captures translated to output-schema + // nodes); for Repeating rules it is a no-op. + __translator.auto_translate_captures(&mut __captures, __ast, __user_ctx)?; + #(#bindings)* + let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator); + let __result: Vec = { #transform_body }; + Ok(__result) + })) + } + }) +} + +/// Parse `manual_rule!( query { body } )`. +/// +/// Like [`parse_rule_top`] but: +/// - Expects a Rust block `{ ... }` after the query (no `=>` arrow). +/// - Generates code that does NOT auto-translate captures before +/// running the body. Capture variables refer to raw (input-schema) +/// nodes; the body is responsible for explicit translation via +/// `ctx.translate(...)`. +/// - The body is included verbatim and must evaluate to +/// `Result, String>`. +pub fn parse_manual_rule_top(input: TokenStream) -> Result { + let mut tokens = input.into_iter().peekable(); + + // Collect query tokens up to the body block `{ ... }`. + let mut query_tokens = Vec::new(); + loop { + match tokens.peek() { + None => { + return Err(syn::Error::new( + Span::call_site(), + "expected a Rust block `{ ... }` after the query in manual_rule!", + )) + } + Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => break, + _ => { + query_tokens.push(tokens.next().unwrap()); + } + } + } + + let query_stream: TokenStream = query_tokens.into_iter().collect(); + + // Extract captures from the query (same as in `rule!`). + let captures = extract_captures(&query_stream); + + // Parse the query into the QueryNode-building expression. + let query_code = parse_query_top(query_stream)?; + + // Generate capture bindings (same as in `rule!`). + let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site()); + let bindings: Vec = captures + .iter() + .map(|cap| { + let name = Ident::new(&cap.name, Span::call_site()); + let name_str = &cap.name; + match cap.multiplicity { + CaptureMultiplicity::Repeated => quote! { + let #name: Vec = __captures.get_all(#name_str) + .into_iter() + .map(yeast::NodeRef) + .collect(); + }, + CaptureMultiplicity::Optional => quote! { + let #name: Option = + __captures.get_opt(#name_str).map(yeast::NodeRef); + }, + CaptureMultiplicity::Single => quote! { + let #name: yeast::NodeRef = + yeast::NodeRef(__captures.get_var(#name_str).unwrap()); + }, + } + }) + .collect(); + + // Consume the body block. + let body_group = match tokens.next() { + Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => g, + other => { + return Err(syn::Error::new( + Span::call_site(), + format!( + "expected a Rust block `{{ ... }}` after the query in manual_rule!, found: {other:?}" + ), + )) + } + }; + let body_stream = body_group.stream(); + + // No tokens should follow the body. + if let Some(tok) = tokens.next() { + return Err(syn::Error::new_spanned( + tok, + "unexpected token after manual_rule! body", + )); + } + + Ok(quote! { + { + let __query = #query_code; + yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| { + // No auto-translate prefix for manual rules — the body + // is responsible for translating captures explicitly. #(#bindings)* - let mut #ctx_ident = yeast::build::BuildCtx::with_source_range(__ast, &__captures, __fresh, __source_range); - #transform_body + let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator); + #body_stream })) } }) diff --git a/shared/yeast/doc/yeast.md b/shared/yeast/doc/yeast.md index 823bf1c19425..1700029b43c0 100644 --- a/shared/yeast/doc/yeast.md +++ b/shared/yeast/doc/yeast.md @@ -265,7 +265,21 @@ occurrences of the same `$name` within one `BuildCtx` share the same value: ) ``` -`{..expr}` splices a `Vec` (or any iterable of `Id`): +The contents of `{…}` are treated as a Rust block, so multi-statement +expressions (with `let` bindings) work too: + +```rust +(assignment + left: {tmp} + right: { + let lit = ctx.literal("integer", "0"); + tree!((binary_expr op: (operator "+") left: {tmp} right: {lit})) + }) +``` + +`{..expr}` splices a `Vec` (or any iterable of `Id`); the contents +are likewise a Rust block, so the splice can be the result of arbitrary +computation: ```rust yeast::trees!(ctx, diff --git a/shared/yeast/src/bin/main.rs b/shared/yeast/src/bin/main.rs index 975c8e8b25f5..978be21cc003 100644 --- a/shared/yeast/src/bin/main.rs +++ b/shared/yeast/src/bin/main.rs @@ -20,7 +20,7 @@ fn main() { let args = Cli::parse(); let language = get_language(&args.language); let source = std::fs::read_to_string(&args.file).unwrap(); - let runner = yeast::Runner::new(language, &[]); + let runner: yeast::Runner = yeast::Runner::new(language, &[]); let ast = runner.run(&source).unwrap(); println!("{}", ast.print(&source, ast.get_root())); } diff --git a/shared/yeast/src/build.rs b/shared/yeast/src/build.rs index d0f1394ca6d9..c7c605305994 100644 --- a/shared/yeast/src/build.rs +++ b/shared/yeast/src/build.rs @@ -2,28 +2,60 @@ use std::collections::BTreeMap; use crate::captures::Captures; use crate::tree_builder::FreshScope; -use crate::{Ast, FieldId, Id, NodeContent}; +use crate::{Ast, FieldId, Id, NodeContent, TranslatorHandle}; /// Context for building new AST nodes during a transformation. /// /// Used by the `tree!` and `trees!` macros. Holds a mutable reference to the -/// AST, a reference to the captures from a query match, and a `FreshScope` for -/// generating unique identifiers. -pub struct BuildCtx<'a> { +/// AST, a reference to the captures from a query match, a `FreshScope` for +/// generating unique identifiers, and a mutable reference to a user-defined +/// context of type `C`. +/// +/// The user context `C` is shared across rules via the framework's driver: +/// outer rules can write to it before recursive translation, and inner rules +/// can read (or further mutate) it during their transforms. The framework +/// snapshots and restores the user context around each rule application, so +/// mutations made by a rule are visible to its descendants (via recursive +/// translation) but not to its parent's siblings. +/// +/// `BuildCtx` implements [`Deref`] and [`DerefMut`] targeting `C`, so user +/// context fields are accessible as `ctx.my_field` directly (provided they +/// don't collide with `BuildCtx`'s own fields like `ast`, `captures`, etc.). +/// +/// The default `C = ()` means rules that don't need any user context don't +/// pay any cost. +/// +/// When constructed by the framework (via the rule! macro), `BuildCtx` also +/// carries a [`TranslatorHandle`] that the [`translate`] method delegates +/// to. When constructed by hand (e.g. in tests), the translator is `None` +/// and [`translate`] returns an error. +pub struct BuildCtx<'a, C: 'a = ()> { pub ast: &'a mut Ast, pub captures: &'a Captures, pub fresh: &'a FreshScope, /// Source range of the matched node, inherited by synthetic nodes. pub source_range: Option, + /// User-supplied context, accessible directly via `ctx.field` (via Deref). + pub user_ctx: &'a mut C, + /// Optional translator handle, populated when the context is built by + /// the framework's rule driver. None when the context is built by hand. + pub(crate) translator: Option>, } -impl<'a> BuildCtx<'a> { - pub fn new(ast: &'a mut Ast, captures: &'a Captures, fresh: &'a FreshScope) -> Self { +impl<'a, C> BuildCtx<'a, C> { + pub fn new( + ast: &'a mut Ast, + captures: &'a Captures, + fresh: &'a FreshScope, + user_ctx: &'a mut C, + ) -> Self { Self { ast, captures, fresh, source_range: None, + user_ctx, + translator: None, } } @@ -32,12 +64,35 @@ impl<'a> BuildCtx<'a> { captures: &'a Captures, fresh: &'a FreshScope, source_range: Option, + user_ctx: &'a mut C, + ) -> Self { + Self { + ast, + captures, + fresh, + source_range, + user_ctx, + translator: None, + } + } + + /// Construct a `BuildCtx` carrying a translator handle. Used by the + /// `rule!` macro to enable [`translate`] inside rule transforms. + pub fn with_translator( + ast: &'a mut Ast, + captures: &'a Captures, + fresh: &'a FreshScope, + source_range: Option, + user_ctx: &'a mut C, + translator: TranslatorHandle<'a, C>, ) -> Self { Self { ast, captures, fresh, source_range, + user_ctx, + translator: Some(translator), } } @@ -113,3 +168,52 @@ impl<'a> BuildCtx<'a> { self.ast.prepend_field_child(node_id, field_id, value_id); } } + +impl BuildCtx<'_, C> { + /// Recursively translate a node via the framework's rule machinery. + /// In a OneShot phase, applies OneShot rules to the given node and + /// returns the resulting node ids. In a Repeating phase, errors + /// (translation is not meaningful when input and output share a + /// schema). + /// + /// Accepts any value convertible to [`Id`] (including [`crate::NodeRef`]), + /// so manual rules can pass capture bindings directly without unwrapping. + /// + /// Errors if this `BuildCtx` was constructed by hand (without a + /// translator handle) — for example, in unit tests that don't go + /// through the rule driver. + pub fn translate>(&mut self, id: I) -> Result, String> { + let id = id.into(); + match &self.translator { + Some(t) => t.translate(self.ast, self.user_ctx, id), + None => Err("translate() called on a BuildCtx without a translator handle".into()), + } + } + + /// Translate an optional capture, returning the first translated id or + /// `None`. Convenience for `?`-quantifier captures (`Option`). + /// + /// If the underlying translation produces multiple ids for a single + /// input, only the first is returned. For most use cases (e.g. + /// translating a single type annotation) this is what you want; if + /// you need all ids, use [`translate`] directly. + pub fn translate_opt>(&mut self, id: Option) -> Result, String> { + match id { + Some(id) => Ok(self.translate(id)?.into_iter().next()), + None => Ok(None), + } + } +} + +impl std::ops::Deref for BuildCtx<'_, C> { + type Target = C; + fn deref(&self) -> &C { + &*self.user_ctx + } +} + +impl std::ops::DerefMut for BuildCtx<'_, C> { + fn deref_mut(&mut self) -> &mut C { + &mut *self.user_ctx + } +} diff --git a/shared/yeast/src/dump.rs b/shared/yeast/src/dump.rs index d046c192053d..be496d40bd5b 100644 --- a/shared/yeast/src/dump.rs +++ b/shared/yeast/src/dump.rs @@ -53,12 +53,7 @@ pub fn dump_ast_with_options( /// /// Any node that does not match the expected type set for its parent field is /// rendered with a trailing `" <-- ERROR: ..."` annotation on the same line. -pub fn dump_ast_with_type_errors( - ast: &Ast, - root: usize, - source: &str, - schema: &Schema, -) -> String { +pub fn dump_ast_with_type_errors(ast: &Ast, root: usize, source: &str, schema: &Schema) -> String { dump_ast_with_type_errors_and_options(ast, root, source, schema, &DumpOptions::default()) } @@ -74,7 +69,15 @@ pub fn dump_ast_with_type_errors_and_options( options: &DumpOptions, ) -> String { let mut out = String::new(); - dump_node(ast, root, source, options, 0, Some((schema, None, None)), &mut out); + dump_node( + ast, + root, + source, + options, + 0, + Some((schema, None, None)), + &mut out, + ); out } @@ -232,8 +235,8 @@ fn dump_node( } let field_name = ast.field_name_for_id(field_id).unwrap_or("?"); let child_type_check = type_check.map(|(schema, _, _)| { - let expected = expected_for_field(schema, node.kind_name(), field_id) - .or(Some(EMPTY_NODE_TYPES)); + let expected = + expected_for_field(schema, node.kind_name(), field_id).or(Some(EMPTY_NODE_TYPES)); let parent_field = Some((node.kind_name(), field_name)); (schema, expected, parent_field) }); diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index 9c3a4ad41141..e0fffc551f34 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -16,7 +16,7 @@ pub mod schema; pub mod tree_builder; mod visitor; -pub use yeast_macros::{query, rule, tree, trees}; +pub use yeast_macros::{manual_rule, query, rule, tree, trees}; use captures::Captures; pub use cursor::Cursor; @@ -297,7 +297,9 @@ impl Ast { /// Returns the source text for `id`, resolving `NodeContent::Range` /// against the stored source bytes when available. pub fn source_text(&self, id: Id) -> String { - let Some(node) = self.get_node(id) else { return String::new(); }; + let Some(node) = self.get_node(id) else { + return String::new(); + }; let read_range = |range: &tree_sitter::Range| { let start = range.start_byte; let end = range.end_byte; @@ -488,7 +490,10 @@ impl Ast { /// Prepend a child id to the given field of the given node. pub fn prepend_field_child(&mut self, node_id: Id, field_id: FieldId, value_id: Id) { - let node = self.nodes.get_mut(node_id).expect("prepend_field_child: invalid node id"); + let node = self + .nodes + .get_mut(node_id) + .expect("prepend_field_child: invalid node id"); node.fields.entry(field_id).or_default().insert(0, value_id); } @@ -700,18 +705,118 @@ impl From for NodeContent { } } -/// The transform function for a rule: takes the AST, captured variables, a -/// fresh-name scope, and the source range of the matched node, and returns -/// the IDs of the replacement nodes. -pub type Transform = Box< - dyn Fn(&mut Ast, Captures, &tree_builder::FreshScope, Option) -> Vec +/// A handle that lets a rule transform recursively translate AST nodes via +/// the framework's rule machinery. Constructed by the driver and passed as +/// the last argument of every [`Transform`] invocation. +/// +/// The `rule!` macro uses [`TranslatorHandle::auto_translate_captures`] in +/// its generated prefix to translate captures before running the user's +/// transform body. Manually-written transforms (using [`Rule::new`] +/// directly) can call [`TranslatorHandle::translate`] selectively on +/// specific node ids to control when translation happens. +pub struct TranslatorHandle<'a, C> { + inner: TranslatorImpl<'a, C>, +} + +/// Internal phase-specific translation state. Kept private — callers +/// interact with [`TranslatorHandle`] only. +enum TranslatorImpl<'a, C> { + /// OneShot phase translator: recursively applies OneShot rules. + OneShot { + index: &'a RuleIndex<'a, C>, + fresh: &'a tree_builder::FreshScope, + rewrite_depth: usize, + /// The id of the node the current rule is matching. Used by + /// [`auto_translate_captures`] to avoid infinite recursion when a + /// rule captures its own match root (e.g. via `(_) @_`). + matched_root: Id, + }, + /// Repeating phase translator: translation is not meaningful here + /// (input and output schemas are the same). [`translate`] errors; + /// [`auto_translate_captures`] is a no-op so the macro's auto-prefix + /// works unchanged for Repeating rules. + Repeating, +} + +impl<'a, C: Clone> TranslatorHandle<'a, C> { + /// Recursively apply OneShot rules to `id` and return the resulting + /// node ids. Errors in a Repeating phase (where translation is not + /// meaningful). + pub fn translate(&self, ast: &mut Ast, user_ctx: &mut C, id: Id) -> Result, String> { + match &self.inner { + TranslatorImpl::OneShot { + index, + fresh, + rewrite_depth, + .. + } => apply_one_shot_rules_inner(index, ast, user_ctx, id, fresh, rewrite_depth + 1), + TranslatorImpl::Repeating => { + Err("translate() is not available in a Repeating phase".into()) + } + } + } + + /// Translate every captured node in `captures` in place (OneShot phase + /// only). In a Repeating phase this is a no-op — Repeating rules + /// receive raw captures. + /// + /// Used by the `rule!` macro's generated prefix to preserve the + /// pre-existing "auto-translate captures before running the transform + /// body" behavior. Manually-written transforms typically translate + /// captures selectively via [`translate`] instead. + /// + /// To avoid infinite recursion, a capture whose id matches the rule's + /// matched root (e.g. from a `(_) @_` pattern) is left unchanged. + pub fn auto_translate_captures( + &self, + captures: &mut Captures, + ast: &mut Ast, + user_ctx: &mut C, + ) -> Result<(), String> { + match &self.inner { + TranslatorImpl::OneShot { matched_root, .. } => { + let root = *matched_root; + captures.try_map_all_captures(|cid| { + if cid == root { + Ok(vec![cid]) + } else { + self.translate(ast, user_ctx, cid) + } + }) + } + TranslatorImpl::Repeating => Ok(()), + } + } +} + +/// The transform function for a rule. +/// +/// Takes the AST, the (raw, untranslated) captured variables, a fresh-name +/// scope, the source range of the matched node, a mutable reference to the +/// user context of type `C`, and a [`TranslatorHandle`] for recursively +/// translating nodes. Returns the IDs of the replacement nodes, or an +/// error message if the transform could not be completed. +/// +/// Transforms produced by [`Rule::new`] receive **raw** captures and must +/// translate them themselves (via the handle). Transforms produced by the +/// `rule!` macro have an auto-translation prefix injected for backward +/// compatibility. +pub type Transform = Box< + dyn Fn( + &mut Ast, + Captures, + &tree_builder::FreshScope, + Option, + &mut C, + TranslatorHandle<'_, C>, + ) -> Result, String> + Send + Sync, >; -pub struct Rule { +pub struct Rule { query: QueryNode, - transform: Transform, + transform: Transform, /// If true, after this rule fires on a node the engine will try to /// re-apply this same rule on the result root. Defaults to false: /// each rule fires at most once on a given node, which prevents @@ -719,8 +824,8 @@ pub struct Rule { repeated: bool, } -impl Rule { - pub fn new(query: QueryNode, transform: Transform) -> Self { +impl Rule { + pub fn new(query: QueryNode, transform: Transform) -> Self { Self { query, transform, @@ -742,9 +847,13 @@ impl Rule { ast: &mut Ast, node: Id, fresh: &tree_builder::FreshScope, + user_ctx: &mut C, + translator: TranslatorHandle<'_, C>, ) -> Result>, String> { match self.try_match(ast, node)? { - Some(captures) => Ok(Some(self.run_transform(ast, captures, node, fresh))), + Some(captures) => Ok(Some( + self.run_transform(ast, captures, node, fresh, user_ctx, translator)?, + )), None => Ok(None), } } @@ -768,29 +877,31 @@ impl Rule { captures: Captures, node: Id, fresh: &tree_builder::FreshScope, - ) -> Vec { + user_ctx: &mut C, + translator: TranslatorHandle<'_, C>, + ) -> Result, String> { fresh.next_scope(); let source_range = ast.get_node(node).and_then(|n| match n.content { NodeContent::Range(r) => Some(r), _ => n.source_range, }); - (self.transform)(ast, captures, fresh, source_range) + (self.transform)(ast, captures, fresh, source_range, user_ctx, translator) } } const MAX_REWRITE_DEPTH: usize = 100; /// Index of rules by their root query kind for fast lookup. -struct RuleIndex<'a> { +struct RuleIndex<'a, C> { /// Rules indexed by root node kind name. - by_kind: BTreeMap<&'static str, Vec<&'a Rule>>, + by_kind: BTreeMap<&'static str, Vec<&'a Rule>>, /// Rules with wildcard queries (Any) that apply to all nodes. - wildcard: Vec<&'a Rule>, + wildcard: Vec<&'a Rule>, } -impl<'a> RuleIndex<'a> { - fn new(rules: &'a [Rule]) -> Self { - let mut by_kind: BTreeMap<&'static str, Vec<&'a Rule>> = BTreeMap::new(); +impl<'a, C> RuleIndex<'a, C> { + fn new(rules: &'a [Rule]) -> Self { + let mut by_kind: BTreeMap<&'static str, Vec<&'a Rule>> = BTreeMap::new(); let mut wildcard = Vec::new(); for rule in rules { match rule.query.root_kind() { @@ -801,7 +912,7 @@ impl<'a> RuleIndex<'a> { Self { by_kind, wildcard } } - fn rules_for_kind(&self, kind: &str) -> impl Iterator { + fn rules_for_kind(&self, kind: &str) -> impl Iterator> { self.by_kind .get(kind) .into_iter() @@ -810,23 +921,25 @@ impl<'a> RuleIndex<'a> { } } -fn apply_repeating_rules( - rules: &[Rule], +fn apply_repeating_rules( + rules: &[Rule], ast: &mut Ast, + user_ctx: &mut C, id: Id, fresh: &tree_builder::FreshScope, ) -> Result, String> { let index = RuleIndex::new(rules); - apply_repeating_rules_inner(&index, ast, id, fresh, 0, None) + apply_repeating_rules_inner(&index, ast, user_ctx, id, fresh, 0, None) } -fn apply_repeating_rules_inner( - index: &RuleIndex, +fn apply_repeating_rules_inner( + index: &RuleIndex, ast: &mut Ast, + user_ctx: &mut C, id: Id, fresh: &tree_builder::FreshScope, rewrite_depth: usize, - skip_rule: Option<*const Rule>, + skip_rule: Option<*const Rule>, ) -> Result, String> { if rewrite_depth > MAX_REWRITE_DEPTH { return Err(format!( @@ -837,11 +950,23 @@ fn apply_repeating_rules_inner( let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or(""); for rule in index.rules_for_kind(node_kind) { - let rule_ptr = *rule as *const Rule; + let rule_ptr = *rule as *const Rule; if Some(rule_ptr) == skip_rule { continue; } - if let Some(result_node) = rule.try_rule(ast, id, fresh)? { + // Snapshot the user context before invoking the rule so that any + // mutations the rule makes are visible during recursive translation + // of its result, but not leaked to the parent's siblings. + let snapshot = user_ctx.clone(); + // Repeating rules don't need a real translator: their captures + // aren't auto-translated (Repeating preserves the input schema), + // and `ctx.translate(id)` errors if invoked from a Repeating + // transform. + let translator = TranslatorHandle { + inner: TranslatorImpl::Repeating, + }; + let try_result = rule.try_rule(ast, id, fresh, user_ctx, translator)?; + if let Some(result_node) = try_result { // For non-repeated rules, suppress further application of *this* // rule on the result root, so a rule whose output matches its own // query doesn't loop. Other rules and child traversal are @@ -852,14 +977,19 @@ fn apply_repeating_rules_inner( results.extend(apply_repeating_rules_inner( index, ast, + user_ctx, node, fresh, rewrite_depth + 1, next_skip, )?); } + *user_ctx = snapshot; return Ok(results); } + // Rule didn't match; restore any speculative changes (none expected + // since try_rule only mutates on match, but be defensive). + *user_ctx = snapshot; } // Take the parent's fields by ownership: the recursion will rewrite @@ -874,7 +1004,15 @@ fn apply_repeating_rules_inner( for children in fields.values_mut() { let mut new_children: Option> = None; for (i, &child_id) in children.iter().enumerate() { - let result = apply_repeating_rules_inner(index, ast, child_id, fresh, rewrite_depth, None)?; + let result = apply_repeating_rules_inner( + index, + ast, + user_ctx, + child_id, + fresh, + rewrite_depth, + None, + )?; let unchanged = result.len() == 1 && result[0] == child_id; match (&mut new_children, unchanged) { (None, true) => {} // unchanged so far, no allocation needed @@ -903,24 +1041,25 @@ fn apply_repeating_rules_inner( /// each visited node, recursion proceeds only through captured nodes (not /// through the input node's children directly), and an error is returned if /// no rule matches a visited node. -fn apply_one_shot_rules( - rules: &[Rule], +fn apply_one_shot_rules( + rules: &[Rule], ast: &mut Ast, + user_ctx: &mut C, id: Id, fresh: &tree_builder::FreshScope, ) -> Result, String> { let index = RuleIndex::new(rules); - apply_one_shot_rules_inner(&index, ast, id, fresh, 0) + apply_one_shot_rules_inner(&index, ast, user_ctx, id, fresh, 0) } -fn apply_one_shot_rules_inner( - index: &RuleIndex, +fn apply_one_shot_rules_inner( + index: &RuleIndex, ast: &mut Ast, + user_ctx: &mut C, id: Id, fresh: &tree_builder::FreshScope, rewrite_depth: usize, ) -> Result, String> { - if rewrite_depth > MAX_REWRITE_DEPTH { return Err(format!( "Desugaring exceeded maximum rewrite depth ({MAX_REWRITE_DEPTH}). \ @@ -931,22 +1070,27 @@ fn apply_one_shot_rules_inner( let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or(""); for rule in index.rules_for_kind(node_kind) { - if let Some(mut captures) = rule.try_match(ast, id)? { - // Recursively translate every captured node before invoking the - // transform. The transform's output uses output-schema kinds, so - // we must translate captured input-schema nodes to their - // output-schema equivalents first. - captures.try_map_all_captures(|captured_id| { - // Avoid infinite recursion when a capture refers to the root - // node of the matched tree (e.g. an `@_` capture on the - // pattern root): re-analyzing it would match the same rule - // again indefinitely. - if captured_id == id { - return Ok(vec![captured_id]); - } - apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1) - })?; - return Ok(rule.run_transform(ast, captures, id, fresh)); + if let Some(captures) = rule.try_match(ast, id)? { + // Snapshot the user context before invoking the rule so that any + // mutations the rule (or its transitively-translated captures) + // make are visible during this rule's transform, but not leaked + // to the parent's siblings. + let snapshot = user_ctx.clone(); + // Build the translator handle the transform will use to + // recursively translate captures (or, for macro-generated + // rules, the auto-translate prefix uses it to translate every + // capture up front, preserving the legacy behavior). + let translator = TranslatorHandle { + inner: TranslatorImpl::OneShot { + index, + fresh, + rewrite_depth, + matched_root: id, + }, + }; + let result = rule.run_transform(ast, captures, id, fresh, user_ctx, translator)?; + *user_ctx = snapshot; + return Ok(result); } } @@ -974,15 +1118,15 @@ pub enum PhaseKind { /// starts. Rules within a phase compete for matches as usual; rules in /// different phases never compete because each traversal only considers the /// current phase's rules. -pub struct Phase { +pub struct Phase { /// Name used in error messages. pub name: String, - pub rules: Vec, + pub rules: Vec>, pub kind: PhaseKind, } -impl Phase { - pub fn new(name: impl Into, kind: PhaseKind, rules: Vec) -> Self { +impl Phase { + pub fn new(name: impl Into, kind: PhaseKind, rules: Vec>) -> Self { Self { name: name.into(), rules, @@ -1008,17 +1152,30 @@ impl Phase { /// .add_phase("desugar", PhaseKind::Repeating, desugar_rules) /// .with_output_node_types_yaml(yaml); /// ``` -#[derive(Default)] -pub struct DesugaringConfig { +/// +/// The optional type parameter `C` is the user context type threaded through +/// rule transforms. Defaults to `()` (no user context). +pub struct DesugaringConfig { /// Phases of rule application, applied in order. - pub phases: Vec, + pub phases: Vec>, /// Output node-types in YAML format. If `None`, the input grammar's /// node types are used (i.e. the desugared AST has the same node types /// as the tree-sitter grammar). pub output_node_types_yaml: Option<&'static str>, } -impl DesugaringConfig { +// Manual `Default` impl so users with a custom `C` that doesn't implement +// `Default` can still construct an empty config. +impl Default for DesugaringConfig { + fn default() -> Self { + Self { + phases: Vec::new(), + output_node_types_yaml: None, + } + } +} + +impl DesugaringConfig { /// Create an empty configuration. Add phases via [`add_phase`] and an /// optional output schema via [`with_output_node_types_yaml`]. pub fn new() -> Self { @@ -1030,7 +1187,7 @@ impl DesugaringConfig { mut self, name: impl Into, kind: PhaseKind, - rules: Vec, + rules: Vec>, ) -> Self { self.phases.push(Phase::new(name, kind, rules)); self @@ -1052,15 +1209,15 @@ impl DesugaringConfig { } } -pub struct Runner<'a> { +pub struct Runner<'a, C = ()> { language: tree_sitter::Language, schema: schema::Schema, - phases: &'a [Phase], + phases: &'a [Phase], } -impl<'a> Runner<'a> { +impl<'a, C> Runner<'a, C> { /// Create a runner using the input grammar's schema for output. - pub fn new(language: tree_sitter::Language, phases: &'a [Phase]) -> Self { + pub fn new(language: tree_sitter::Language, phases: &'a [Phase]) -> Self { let schema = schema::Schema::from_language(&language); Self { language, @@ -1073,7 +1230,7 @@ impl<'a> Runner<'a> { pub fn with_schema( language: tree_sitter::Language, schema: &schema::Schema, - phases: &'a [Phase], + phases: &'a [Phase], ) -> Self { Self { language, @@ -1085,7 +1242,7 @@ impl<'a> Runner<'a> { /// Create a runner from a [`DesugaringConfig`]. pub fn from_config( language: tree_sitter::Language, - config: &'a DesugaringConfig, + config: &'a DesugaringConfig, ) -> Result { let schema = config.build_schema(&language)?; Ok(Self { @@ -1094,11 +1251,17 @@ impl<'a> Runner<'a> { phases: &config.phases, }) } +} - pub fn run_from_tree( +impl<'a, C: Clone> Runner<'a, C> { + /// Parse `tree` against `source` and run all phases, threading + /// `user_ctx` through every rule transform. The caller owns the + /// initial context state. + pub fn run_from_tree_with_ctx( &self, tree: &tree_sitter::Tree, source: &[u8], + user_ctx: &mut C, ) -> Result { let mut ast = Ast::from_tree_with_schema_and_source( self.schema.clone(), @@ -1106,11 +1269,13 @@ impl<'a> Runner<'a> { &self.language, source.to_vec(), ); - self.run_phases(&mut ast)?; + self.run_phases(&mut ast, user_ctx)?; Ok(ast) } - pub fn run(&self, input: &str) -> Result { + /// Parse `input` and run all phases, threading `user_ctx` through + /// every rule transform. The caller owns the initial context state. + pub fn run_with_ctx(&self, input: &str, user_ctx: &mut C) -> Result { let mut parser = tree_sitter::Parser::new(); parser .set_language(&self.language) @@ -1124,20 +1289,24 @@ impl<'a> Runner<'a> { &self.language, input.as_bytes().to_vec(), ); - self.run_phases(&mut ast)?; + self.run_phases(&mut ast, user_ctx)?; Ok(ast) } /// Apply each phase in turn to the AST, threading the root through. /// A single `FreshScope` is shared across phases so that fresh /// identifiers generated in different phases don't collide. - fn run_phases(&self, ast: &mut Ast) -> Result<(), String> { + fn run_phases(&self, ast: &mut Ast, user_ctx: &mut C) -> Result<(), String> { let fresh = tree_builder::FreshScope::new(); let mut root = ast.get_root(); for phase in self.phases { let res = match phase.kind { - PhaseKind::Repeating => apply_repeating_rules(&phase.rules, ast, root, &fresh), - PhaseKind::OneShot => apply_one_shot_rules(&phase.rules, ast, root, &fresh), + PhaseKind::Repeating => { + apply_repeating_rules(&phase.rules, ast, user_ctx, root, &fresh) + } + PhaseKind::OneShot => { + apply_one_shot_rules(&phase.rules, ast, user_ctx, root, &fresh) + } } .map_err(|e| format!("Phase `{}`: {e}", phase.name))?; if res.len() != 1 { @@ -1153,3 +1322,78 @@ impl<'a> Runner<'a> { Ok(()) } } + +impl<'a, C: Clone + Default> Runner<'a, C> { + /// Parse `tree` against `source` and run all phases, using the + /// default context (`C::default()`) as the initial context state. + pub fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result { + let mut user_ctx = C::default(); + self.run_from_tree_with_ctx(tree, source, &mut user_ctx) + } + + /// Parse `input` and run all phases, using the default context + /// (`C::default()`) as the initial context state. + pub fn run(&self, input: &str) -> Result { + let mut user_ctx = C::default(); + self.run_with_ctx(input, &mut user_ctx) + } +} + +// --------------------------------------------------------------------------- +// Desugarer: type-erased view of a DesugaringConfig + Runner +// --------------------------------------------------------------------------- + +/// Type-erased interface to a desugaring pipeline for a single language. +/// +/// Consumers (e.g. a generic tree-sitter extractor) hold +/// `Box` so they can dispatch through the trait without +/// knowing the user context type `C` that's internal to yeast. +/// +/// Construct one via [`ConcreteDesugarer::new`] from a +/// [`DesugaringConfig`] and a [`tree_sitter::Language`]. +pub trait Desugarer: Send + Sync { + /// The output AST schema (in YAML format), or `None` if the input + /// grammar's schema should be used. + fn output_node_types_yaml(&self) -> Option<&'static str>; + + /// Parse `tree` against `source` and run the desugaring pipeline. + /// Each call constructs a fresh default user context internally. + fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result; +} + +/// A concrete [`Desugarer`] backed by a [`DesugaringConfig`] for a +/// specific user context type `C`. Stores the language and a pre-built +/// schema so that per-call cost is bounded to constructing a transient +/// [`Runner`] and cloning the schema (no YAML re-parsing). +pub struct ConcreteDesugarer { + language: tree_sitter::Language, + schema: schema::Schema, + config: DesugaringConfig, +} + +impl ConcreteDesugarer { + /// Build a desugarer for `language` from `config`. Parses the output + /// schema YAML once (if set) and stores it for reuse across files. + pub fn new( + language: tree_sitter::Language, + config: DesugaringConfig, + ) -> Result { + let schema = config.build_schema(&language)?; + Ok(Self { + language, + schema, + config, + }) + } +} + +impl Desugarer for ConcreteDesugarer { + fn output_node_types_yaml(&self) -> Option<&'static str> { + self.config.output_node_types_yaml + } + + fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result { + let runner = Runner::with_schema(self.language.clone(), &self.schema, &self.config.phases); + runner.run_from_tree(tree, source) + } +} diff --git a/shared/yeast/src/node_types_yaml.rs b/shared/yeast/src/node_types_yaml.rs index 797f14cba720..f4d9f2a1c427 100644 --- a/shared/yeast/src/node_types_yaml.rs +++ b/shared/yeast/src/node_types_yaml.rs @@ -242,10 +242,7 @@ pub fn convert(yaml_input: &str) -> Result { /// Apply YAML node-type definitions to a mutable Schema. /// Registers all types, fields, and allowed types from the YAML into the schema. -fn apply_yaml_to_schema( - yaml: &YamlNodeTypes, - schema: &mut crate::schema::Schema, -) { +fn apply_yaml_to_schema(yaml: &YamlNodeTypes, schema: &mut crate::schema::Schema) { // Register all supertypes as node kinds for name in yaml.supertypes.keys() { schema.register_kind(name); @@ -307,7 +304,8 @@ fn apply_yaml_to_schema( .into_vec() .into_iter() .map(|type_ref| { - let (kind, named) = resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types); + let (kind, named) = + resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types); crate::schema::NodeType { kind, named } }) .collect::>(); diff --git a/shared/yeast/src/schema.rs b/shared/yeast/src/schema.rs index bbd425f15a2c..da13bb8b6b70 100644 --- a/shared/yeast/src/schema.rs +++ b/shared/yeast/src/schema.rs @@ -198,13 +198,8 @@ impl Schema { .insert((parent_kind.to_string(), field_id), node_types); } - pub fn field_types( - &self, - parent_kind: &str, - field_id: FieldId, - ) -> Option<&Vec> { - self.field_types - .get(&(parent_kind.to_string(), field_id)) + pub fn field_types(&self, parent_kind: &str, field_id: FieldId) -> Option<&Vec> { + self.field_types.get(&(parent_kind.to_string(), field_id)) } pub fn set_field_cardinality( diff --git a/shared/yeast/tests/test.rs b/shared/yeast/tests/test.rs index 069132d09237..99471f129abf 100644 --- a/shared/yeast/tests/test.rs +++ b/shared/yeast/tests/test.rs @@ -7,7 +7,7 @@ const OUTPUT_SCHEMA_YAML: &str = include_str!("node-types.yml"); /// Helper: parse Ruby source with no rules, return dump. fn parse_and_dump(input: &str) -> String { - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run(input).unwrap(); dump_ast(&ast, ast.get_root(), input) } @@ -24,7 +24,7 @@ fn run_and_ast(input: &str, rules: Vec) -> Ast { let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); runner.run(input).unwrap() } @@ -34,7 +34,7 @@ fn run_phased_and_dump(input: &str, phases: Vec) -> String { let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let ast = runner.run(input).unwrap(); dump_ast(&ast, ast.get_root(), input) } @@ -46,7 +46,7 @@ fn run_and_get_error(input: &str, rules: Vec) -> String { let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); runner .run(input) .expect_err("expected runner to return an error") @@ -54,7 +54,7 @@ fn run_and_get_error(input: &str, rules: Vec) -> String { /// Helper: parse Ruby source with no rules and dump with schema type errors. fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String { - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run(input).unwrap(); let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap(); dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema) @@ -64,10 +64,10 @@ fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String { /// building schema with language IDs so field checks align with parser fields. fn parse_and_dump_typed_with_language(input: &str, schema_yaml: &str) -> String { let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); - let runner = Runner::new(lang.clone(), &[]); + let runner: Runner = Runner::new(lang.clone(), &[]); let ast = runner.run(input).unwrap(); - let schema = yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang) - .unwrap(); + let schema = + yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang).unwrap(); dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema) } @@ -76,7 +76,7 @@ fn run_and_dump_typed(input: &str, rules: Vec, schema_yaml: &str) -> Strin let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap(); let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let ast = runner.run(input).unwrap(); dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema) } @@ -166,7 +166,7 @@ fn test_parse_for_loop() { #[test] fn test_dump_highlights_type_errors_inline() { - let schema_yaml = r#" + let schema_yaml = r#" named: program: $children*: assignment @@ -176,13 +176,13 @@ named: identifier: "#; - let dump = parse_and_dump_typed("x = 1", schema_yaml); - assert!(dump.contains("integer \"1\" <-- ERROR:")); + let dump = parse_and_dump_typed("x = 1", schema_yaml); + assert!(dump.contains("integer \"1\" <-- ERROR:")); } #[test] fn test_dump_reports_preserved_unknown_kind_after_transformation() { - let schema_yaml = r#" + let schema_yaml = r#" named: program: $children*: assignment @@ -192,25 +192,25 @@ named: identifier: "#; - // This rewrite runs and preserves the RHS node kind via capture. - // With schema above, preserving `integer` should be reported inline. - let rules = vec![yeast::rule!( - (assignment left: (_) @left right: (_) @right) - => - (assignment - left: {left} - right: {right} - ) - )]; + // This rewrite runs and preserves the RHS node kind via capture. + // With schema above, preserving `integer` should be reported inline. + let rules: Vec = vec![yeast::rule!( + (assignment left: (_) @left right: (_) @right) + => + (assignment + left: {left} + right: {right} + ) + )]; - let dump = run_and_dump_typed("x = 1", rules, schema_yaml); - assert!(dump.contains("integer \"1\" <-- ERROR:")); - assert!(dump.contains("node kind 'integer' not in schema")); + let dump = run_and_dump_typed("x = 1", rules, schema_yaml); + assert!(dump.contains("integer \"1\" <-- ERROR:")); + assert!(dump.contains("node kind 'integer' not in schema")); } #[test] fn test_dump_reports_undeclared_field_on_node() { - let schema_yaml = r#" + let schema_yaml = r#" named: program: $children*: assignment @@ -219,14 +219,14 @@ named: identifier: "#; - let dump = parse_and_dump_typed_with_language("x = y", schema_yaml); - assert!(dump.contains("right: identifier \"y\" <-- ERROR:")); - assert!(dump.contains("the node 'assignment' has no field 'right'")); + let dump = parse_and_dump_typed_with_language("x = y", schema_yaml); + assert!(dump.contains("right: identifier \"y\" <-- ERROR:")); + assert!(dump.contains("the node 'assignment' has no field 'right'")); } #[test] fn test_dump_reports_disallowed_kind_in_field_type() { - let schema_yaml = r#" + let schema_yaml = r#" named: program: $children*: assignment @@ -237,17 +237,17 @@ named: integer: "#; - let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml); - assert!(dump.contains("right: integer \"1\" <-- ERROR:")); - assert!(dump.contains("should contain")); - assert!(dump.contains("but got integer")); + let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml); + assert!(dump.contains("right: integer \"1\" <-- ERROR:")); + assert!(dump.contains("should contain")); + assert!(dump.contains("but got integer")); } // ---- Query tests ---- #[test] fn test_query_match() { - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let query = yeast::query!( @@ -268,7 +268,7 @@ fn test_query_match() { #[test] fn test_query_no_match() { - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let query = yeast::query!( @@ -293,7 +293,7 @@ fn test_query_skips_extras_in_positional_match() { // captured comment to nothing (a common idiom, e.g. // `(comment) => ()` in Swift) leaves the capture's match-list empty // and causes the transform to fail with "Variable X has 0 matches". - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("[1, # comment\n2]").unwrap(); // Navigate to the `array` node: program -> array. @@ -309,15 +309,11 @@ fn test_query_skips_extras_in_positional_match() { let matched = query.do_match(&ast, array_id, &mut captures).unwrap(); assert!(matched); assert_eq!( - ast.get_node(captures.get_var("a").unwrap()) - .unwrap() - .kind(), + ast.get_node(captures.get_var("a").unwrap()).unwrap().kind(), "integer" ); assert_eq!( - ast.get_node(captures.get_var("b").unwrap()) - .unwrap() - .kind(), + ast.get_node(captures.get_var("b").unwrap()).unwrap().kind(), "integer" ); } @@ -325,14 +321,14 @@ fn test_query_skips_extras_in_positional_match() { #[test] fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() { let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); - let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang) - .unwrap(); - let phases = vec![Phase::new( + let schema = + yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); + let phases: Vec = vec![Phase::new( "test", PhaseKind::Repeating, vec![yeast::rule!((integer) => (identifier "replaced"))], )]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let input = "x = 1"; let ast = runner.run(input).unwrap(); @@ -350,7 +346,7 @@ fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() { #[test] fn test_query_repeated_capture() { - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x, y, z = 1").unwrap(); let query = yeast::query!( @@ -375,7 +371,7 @@ fn test_query_repeated_capture() { #[test] fn test_capture_unnamed_node_parenthesized() { // `("=") @op` captures the unnamed `=` token between left and right. - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let query = yeast::query!( @@ -403,7 +399,7 @@ fn test_capture_unnamed_node_parenthesized() { fn test_capture_bare_underscore_repeated() { // `_` matches named and unnamed nodes in bare-child position. On this // assignment shape, bare children correspond to unnamed tokens (the `=`). - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let query = yeast::query!((assignment _* @all)); @@ -425,7 +421,7 @@ fn test_capture_bare_underscore_repeated() { #[test] fn test_capture_unnamed_node_bare_literal() { // `"=" @op` (without surrounding parens) is the same as `("=") @op`. - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let query = yeast::query!( @@ -454,7 +450,7 @@ fn test_bare_underscore_matches_unnamed() { // Bare `_` matches any node, including unnamed tokens, while `(_)` // matches only named nodes. Demonstrate by matching the unnamed `=` // token in the implicit `child` field of an `assignment`. - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let mut cursor = AstCursor::new(&ast); @@ -493,7 +489,7 @@ fn test_bare_forms_in_field_position() { // field's value, not just in the bare-children position. This is // syntactic sugar for `(_)` / `("…")` and goes through the same // code paths. - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let mut cursor = AstCursor::new(&ast); @@ -532,7 +528,7 @@ fn test_forward_scan_finds_unnamed_token_late() { // query for `("end")` skip past the first two and match the third. // Without forward-scan, the matcher took the first child unconditionally // and failed. - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("for x in list do\n y\nend").unwrap(); // Navigate: program > for > do (the body wrapper). @@ -559,7 +555,7 @@ fn test_forward_scan_preserves_order() { // order. A query for ("end") then ("do") should fail because `do` // appears before `end` in the source order; once forward-scan has // consumed `end`, the iterator is exhausted. - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("for x in list do\n y\nend").unwrap(); let mut cursor = AstCursor::new(&ast); @@ -580,7 +576,7 @@ fn test_forward_scan_preserves_order() { #[test] fn test_tree_builder() { - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let mut ast = runner.run("x = 1").unwrap(); let input = "x = 1"; @@ -598,7 +594,8 @@ fn test_tree_builder() { // Swap left and right let fresh = yeast::tree_builder::FreshScope::new(); - let mut ctx = yeast::build::BuildCtx::new(&mut ast, &captures, &fresh); + let mut user_ctx = (); + let mut ctx = yeast::build::BuildCtx::new(&mut ast, &captures, &fresh, &mut user_ctx); let new_id = yeast::tree!(ctx, (program child: (assignment @@ -626,7 +623,7 @@ fn test_tree_builder() { // tree-sitter-ruby grammar with named fields for nodes that only have // unnamed children in tree-sitter (e.g. block_body.stmt, block_parameters.parameter). fn ruby_rules() -> Vec { - let assign_rule = yeast::rule!( + let assign_rule: Rule = yeast::rule!( (assignment left: (left_assignment_list (identifier)* @left @@ -651,7 +648,7 @@ fn ruby_rules() -> Vec { )} ); - let for_rule = yeast::rule!( + let for_rule: Rule = yeast::rule!( (for pattern: (_) @pat value: (in (_) @val) @@ -733,7 +730,7 @@ fn test_desugar_for_loop() { #[test] fn test_shorthand_rule() { - let rule = yeast::rule!( + let rule: Rule = yeast::rule!( (assignment left: (_) @method right: (_) @receiver @@ -885,7 +882,7 @@ fn test_phase_error_includes_phase_name() { PhaseKind::Repeating, vec![swap_assignment_rule().repeated()], )]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let err = runner .run("x = 1") .expect_err("expected runner to return an error"); @@ -928,7 +925,7 @@ fn test_one_shot_phase() { PhaseKind::OneShot, one_shot_xeq1_rules(), )]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let input = "x = 1"; let ast = runner.run(input).unwrap(); @@ -954,7 +951,7 @@ fn test_one_shot_phase_errors_when_no_rule_matches() { let mut rules = one_shot_xeq1_rules(); rules.pop(); let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let err = runner .run("x = 1") @@ -978,7 +975,7 @@ fn test_one_shot_recurses_into_returned_capture() { let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); - let rules = vec![ + let rules: Vec = vec![ yeast::rule!( (program (_)* @stmts) => @@ -994,7 +991,7 @@ fn test_one_shot_recurses_into_returned_capture() { yeast::rule!((integer) => (integer "INT")), ]; let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let input = "x = 1"; let ast = runner.run(input).unwrap(); @@ -1020,7 +1017,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() { let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); - let rules = vec![ + let rules: Vec = vec![ yeast::rule!( (program (_)* @stmts) => @@ -1041,7 +1038,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() { yeast::rule!((integer) => (integer "INT")), ]; let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let input = "x = 1"; let ast = runner.run(input).unwrap(); @@ -1065,7 +1062,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() { #[test] fn test_cursor_navigation() { - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let mut cursor = AstCursor::new(&ast); @@ -1139,7 +1136,7 @@ fn test_desugar_for_with_multiple_assignment() { /// resolves to the captured node's source text via `YeastDisplay`. #[test] fn test_hash_brace_renders_capture_source_text() { - let rule = rule!( + let rule: Rule = rule!( (call method: (identifier) @name receiver: (identifier) @recv @@ -1168,7 +1165,7 @@ fn test_hash_brace_renders_capture_source_text() { /// `Display` impl (covered by `YeastDisplay`'s blanket impls for primitives). #[test] fn test_hash_brace_renders_integer_expression() { - let rule = rule!( + let rule: Rule = rule!( (identifier) @_ => (identifier #{1 + 2}) @@ -1187,7 +1184,7 @@ fn test_hash_brace_renders_integer_expression() { /// source location, not the full source range of the matched rule root. #[test] fn test_hash_brace_uses_capture_location_for_leaf() { - let rule = rule!( + let rule: Rule = rule!( (call method: (identifier) @name receiver: (identifier) @recv @@ -1204,7 +1201,9 @@ fn test_hash_brace_uses_capture_location_for_leaf() { let mut bar_ids: Vec = Vec::new(); for id in ast.reachable_node_ids() { - let Some(node) = ast.get_node(id) else { continue; }; + let Some(node) = ast.get_node(id) else { + continue; + }; if node.kind() == "identifier" && ast.source_text(id) == "bar" { bar_ids.push(id); } diff --git a/unified/extractor/src/extractor.rs b/unified/extractor/src/extractor.rs index 7601fa8addbe..301c6cf533f4 100644 --- a/unified/extractor/src/extractor.rs +++ b/unified/extractor/src/extractor.rs @@ -1,9 +1,9 @@ use clap::Args; use std::path::PathBuf; +use crate::languages; use codeql_extractor::extractor::simple; use codeql_extractor::trap; -use crate::languages; #[derive(Args)] pub struct Options { @@ -35,7 +35,9 @@ pub fn run(options: Options) -> std::io::Result<()> { prefix: "unified".to_string(), languages, trap_dir: options.output_dir, - trap_compression: trap::Compression::from_env("CODEQL_EXTRACTOR_UNIFIED_OPTION_TRAP_COMPRESSION"), + trap_compression: trap::Compression::from_env( + "CODEQL_EXTRACTOR_UNIFIED_OPTION_TRAP_COMPRESSION", + ), source_archive_dir: options.source_archive_dir, file_lists: vec![options.file_list], }; diff --git a/unified/extractor/src/generator.rs b/unified/extractor/src/generator.rs index cbf971a8ff25..974de5dbca97 100644 --- a/unified/extractor/src/generator.rs +++ b/unified/extractor/src/generator.rs @@ -22,14 +22,19 @@ pub fn run(options: Options) -> std::io::Result<()> { // The QL-visible schema is the unified output AST, not the per-language // input grammars. Pass it via `desugar.output_node_types_yaml` so the // generator converts the YAML to JSON node-types. - let desugar = yeast::DesugaringConfig::new() - .with_output_node_types_yaml(languages::OUTPUT_AST_SCHEMA); + let desugar = + yeast::DesugaringConfig::new().with_output_node_types_yaml(languages::OUTPUT_AST_SCHEMA); let languages = vec![Language { name: "Unified".to_owned(), - node_types: "", // unused: generator picks up output_node_types_yaml above + node_types: "", // unused: generator picks up output_node_types_yaml above desugar: Some(desugar), }]; - generate(languages, options.dbscheme, options.library, "run unified/scripts/create-extractor-pack.sh") + generate( + languages, + options.dbscheme, + options.library, + "run unified/scripts/create-extractor-pack.sh", + ) } diff --git a/unified/extractor/src/languages/swift/swift.rs b/unified/extractor/src/languages/swift/swift.rs index 79f0e65b02f5..c84e3cf38676 100644 --- a/unified/extractor/src/languages/swift/swift.rs +++ b/unified/extractor/src/languages/swift/swift.rs @@ -1,7 +1,98 @@ use codeql_extractor::extractor::simple; -use yeast::{rule, DesugaringConfig, PhaseKind}; +use yeast::{ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule, manual_rule, rule, tree}; -fn translation_rules() -> Vec { +/// User context propagated from outer rules down to the inner rules that +/// emit the corresponding output declarations, so that each emitted node +/// is born with the outer information (name, type, modifiers, etc.) +/// already set — no schema-invalid intermediate state requiring +/// post-hoc mutation. +#[derive(Clone, Default)] +struct SwiftContext { + /// Identifier node for the property name. Set by the outer + /// `property_binding` (computed accessors / willSet-didSet) and + /// `protocol_property_declaration` rules before translating accessor + /// children; read by the accessor inner rules + /// (`computed_getter`/`computed_setter`/`computed_modify`/ + /// `willset_clause`/`didset_clause`/`getter_specifier`/ + /// `setter_specifier`). + property_name: Option, + /// Translated type node for the property type. Set by the outer + /// `property_binding` rule (computed accessors variant) and + /// `protocol_property_declaration` when present; read by the + /// accessor inner rules. + property_type: Option, + /// Default-value expression for the next translated `parameter`. Set + /// by the outer `function_parameter` rule; read by the `parameter` + /// rules. + default_value: Option, + /// Translated outer modifiers (e.g. visibility, attributes) to + /// attach to each child of a flattening outer rule. Set by + /// `property_declaration`, `enum_entry`, and + /// `protocol_property_declaration`. + outer_modifiers: Vec, + /// The `let`/`var` binding modifier for a `property_declaration`. + /// Set by `property_declaration`; read by the inner declaration + /// rules (`property_binding` variants, accessor rules) so they + /// emit it as part of the output node's `modifier:` field. + binding_modifier: Option, + /// True when the current child of a flattening outer rule is not + /// the first one — its inner rule should emit a + /// `chained_declaration` modifier so the original grouping can be + /// recovered downstream. + is_chained: bool, +} + +/// Build a freshly-created `chained_declaration` modifier node if +/// `ctx.is_chained`, else `None`. Used by inner declaration rules to +/// emit the chained tag for non-first children of a flattening outer +/// rule. Returns `Option` so it splices via `{..…}` to 0 or 1 ids. +fn chained_modifier(ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>) -> Option { + if ctx.is_chained { + Some(ctx.literal("modifier", "chained_declaration")) + } else { + None + } +} + +/// Combine a list of boolean sub-conditions into a single expression by +/// left-folding with the infix `&&` operator. Used by control-flow +/// rules (`if`, `guard`, `while`, `repeat-while`) whose tree-sitter +/// nodes carry one or more comma-separated conditions that the target +/// AST represents as a single `condition:` field. Panics on an empty +/// input because every caller's grammar guarantees at least one +/// condition. +fn and_chain( + ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>, + conds: Vec, +) -> yeast::Id { + conds.into_iter() + .map(yeast::Id::from) + .reduce(|acc, elem| { + tree!((binary_expr operator: (infix_operator "&&") left: {acc} right: {elem})) + }) + .expect("control-flow statement must have at least one condition") +} + +/// Translate a multi-part identifier (for example `Foo.Bar.Baz`) into a +/// `member_access_expr` chain rooted at a `name_expr` over the first +/// part. Panics on an empty input because the grammar's `_+` quantifier +/// guarantees at least one part. +fn member_chain( + ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>, + parts: Vec, +) -> yeast::Id { + let mut iter = parts.into_iter(); + let first = iter + .next() + .expect("identifier with `part:` must have at least one part"); + let init = tree!((name_expr identifier: (identifier #{first}))); + iter.fold( + init, + |acc, elem| tree!((member_access_expr base: {acc} member: (identifier #{elem}))), + ) +} + +fn translation_rules() -> Vec> { vec![ // ---- Top-level ---- // Capture all top-level statements, including unnamed tokens like `nil`. @@ -88,32 +179,49 @@ fn translation_rules() -> Vec { // nodes for individual declarators. The outer property_declaration rule splices these out // and attaches binding/modifiers from the parent. - // Computed property with explicit accessors (get/set/modify) → - // a sequence of accessor_declaration nodes, each with the property name - // attached. Subsequent accessors will be tagged chained_declaration by - // the outer property_declaration rule. - rule!( + // Computed property with explicit accessors (get/set/modify) → a + // sequence of `accessor_declaration` nodes. The outer rule + // publishes the property's name and type into `ctx` so that each + // inner accessor rule + // (`computed_getter`/`computed_setter`/`computed_modify`) builds + // its `accessor_declaration` with `name` and `type` set from the + // start — no schema-invalid intermediate state. + // + // Toggles `ctx.is_chained` per accessor iteration: the first + // accessor inherits the outer rule's chained state (i.e. whether + // this whole property_binding is itself a non-first declarator + // of a containing property_declaration); subsequent accessors + // always emit `chained_declaration`. + manual_rule!( (property_binding name: @pattern type: _? @ty computed_value: (computed_property accessor: _+ @accessors)) - => - {..{ - let name_text = __yeast_ctx.ast.source_text(pattern.into()); - let ty_ids: Vec = ty.iter().map(|&t| t.into()).collect(); - let acc_ids: Vec = accessors.iter().map(|&a| a.into()).collect(); - for &acc_id in &acc_ids { - let ident = __yeast_ctx.literal("identifier", &name_text); - __yeast_ctx.prepend_field(acc_id, "name", ident); - for &ty_id in ty_ids.iter().rev() { - __yeast_ctx.prepend_field(acc_id, "type", ty_id); + { + // Translate `ty` first so the context holds an + // output-schema node id. + let translated_ty = ctx.translate_opt(ty)?; + // Build the property-name identifier from the + // (untranslated) pattern leaf. + let name_id = tree!((identifier #{pattern})); + + ctx.property_name = Some(name_id); + ctx.property_type = translated_ty; + + let mut result = Vec::new(); + for (i, acc) in accessors.into_iter().enumerate() { + if i > 0 { + ctx.is_chained = true; } + result.extend(ctx.translate(acc)?); } - acc_ids - }} + Ok(result) + } ), - // Computed property: shorthand getter (no explicit get/set, just statements) → - // a single accessor_declaration with kind "get". + // Computed property: shorthand getter (no explicit get/set, just + // statements) → a single accessor_declaration with kind "get". + // Reads outer modifiers / chained tag from `ctx` (set by the + // outer `property_declaration` rule). rule!( (property_binding name: (pattern bound_identifier: @name) @@ -121,49 +229,62 @@ fn translation_rules() -> Vec { computed_value: (computed_property statement: _* @body)) => (accessor_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} name: (identifier #{name}) type: {..ty} accessor_kind: (accessor_kind "get") body: (block stmt: {..body})) ), - // Stored property with willSet/didSet observers (initializer optional) → - // variable_declaration followed by one accessor_declaration per observer, - // each carrying the property name. Subsequent items are tagged - // chained_declaration by the outer property_declaration rule. - rule!( + // Stored property with willSet/didSet observers (initializer + // optional) → a `variable_declaration` followed by one + // `accessor_declaration` per observer, each born with the + // property name set. Manual rule: we publish the property name + // into `ctx` before translating the observer children so the + // inner `willset_clause` / `didset_clause` rules construct + // valid `accessor_declaration` nodes from the start. + // + // The `variable_declaration` itself inherits the outer rule's + // chained state; observers always get `chained_declaration` + // because they're subsequent outputs of this flattening rule. + manual_rule!( (property_binding name: (pattern bound_identifier: @name) type: _? @ty value: _? @val observers: (willset_didset_block willset: _? @ws didset: _? @ds)) - => - {..{ - let name_text = __yeast_ctx.ast.source_text(name.into()); - let val_ids: Vec = val.iter().map(|&v| v.into()).collect(); - let ty_ids: Vec = ty.iter().map(|&t| t.into()).collect(); - let mut obs_ids: Vec = Vec::new(); - obs_ids.extend(ws.iter().map(|&o| { let id: usize = o.into(); id })); - obs_ids.extend(ds.iter().map(|&o| { let id: usize = o.into(); id })); - let ident_for_var = __yeast_ctx.literal("identifier", &name_text); - let pat = __yeast_ctx.node("name_pattern", vec![("identifier", vec![ident_for_var])]); - let mut var_fields: Vec<(&str, Vec)> = vec![("pattern", vec![pat])]; - if !ty_ids.is_empty() { - var_fields.push(("type", ty_ids)); - } - if !val_ids.is_empty() { - var_fields.push(("value", val_ids)); - } - let var_id = __yeast_ctx.node("variable_declaration", var_fields); - let mut result = vec![var_id]; - for obs_id in obs_ids { - let ident = __yeast_ctx.literal("identifier", &name_text); - __yeast_ctx.prepend_field(obs_id, "name", ident); - result.push(obs_id); + { + // Translate ty and val so the variable_declaration + // below contains output-schema nodes. + let translated_ty = ctx.translate_opt(ty)?; + let translated_val = ctx.translate_opt(val)?; + + let var_decl = tree!( + (variable_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} + pattern: (name_pattern identifier: (identifier #{name})) + type: {..translated_ty} + value: {..translated_val}) + ); + + // Publish the property name for the observer rules. + ctx.property_name = Some(tree!((identifier #{name}))); + // Observers are subsequent outputs of this flattening + // rule, so they always get `chained_declaration`. + ctx.is_chained = true; + + let mut result = vec![var_decl]; + for obs in ws.into_iter().chain(ds) { + result.extend(ctx.translate(obs)?); } - result - }} + Ok(result) + } ), - // property_binding with any pattern name (identifier or destructuring) + // property_binding with any pattern name (identifier or + // destructuring). Reads outer modifiers / chained tag from `ctx`. rule!( (property_binding name: @pattern @@ -171,37 +292,44 @@ fn translation_rules() -> Vec { value: _? @val) => (variable_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} pattern: {pattern} type: {..ty} value: {..val}) ), - // property_declaration: splice declarators (each may translate to multiple nodes — - // variable_declaration and/or accessor_declaration), and attach the binding modifier - // (let/var) and any outer modifiers to each. All children after the first additionally - // get a synthetic chained_declaration modifier so the grouping can be recovered. - rule!( + // property_declaration: flatten declarators (each may translate + // to multiple nodes — variable_declaration and/or + // accessor_declaration) and attach the binding modifier + // (let/var), outer modifiers, and `chained_declaration` for + // non-first declarations. Manual rule: publishes + // binding/outer modifiers into `ctx` and translates each + // declarator with `ctx.is_chained` toggled per iteration. The + // inner declaration rules (`property_binding` variants, + // accessor inner rules) read these fields and emit complete + // `modifier:` lists from the start. + manual_rule!( (property_declaration binding: (value_binding_pattern mutability: @binding_kind) declarator: _* @decls (modifiers)* @mods) - => - {..{ - let binding_text = __yeast_ctx.ast.source_text(binding_kind.into()); - let mod_ids: Vec = mods.iter().map(|&m| m.into()).collect(); - let decl_ids: Vec = decls.iter().map(|&d| d.into()).collect(); - for (i, &decl_id) in decl_ids.iter().enumerate() { - if i > 0 { - let chained = __yeast_ctx.literal("modifier", "chained_declaration"); - __yeast_ctx.prepend_field(decl_id, "modifier", chained); - } - for &mod_id in mod_ids.iter().rev() { - __yeast_ctx.prepend_field(decl_id, "modifier", mod_id); - } - let binding_mod = __yeast_ctx.literal("modifier", &binding_text); - __yeast_ctx.prepend_field(decl_id, "modifier", binding_mod); + { + let binding_text = ctx.ast.source_text(binding_kind.0); + ctx.binding_modifier = Some(ctx.literal("modifier", &binding_text)); + let mut modifiers = Vec::new(); + for m in mods { + modifiers.extend(ctx.translate(m)?); + } + ctx.outer_modifiers = modifiers; + + let mut result = Vec::new(); + for (i, decl) in decls.into_iter().enumerate() { + ctx.is_chained = i > 0; + result.extend(ctx.translate(decl)?); } - decl_ids - }} + Ok(result) + } ), // ---- Enums ---- // enum_type_parameter → parameter (with optional name as pattern). @@ -217,14 +345,18 @@ fn translation_rules() -> Vec { => (parameter type: {ty}) ), - // enum_case_entry with associated values → class_like_declaration containing - // a constructor whose parameters are the data parameters. + // enum_case_entry with associated values → class_like_declaration + // containing a constructor whose parameters are the data + // parameters. Reads outer modifiers / chained tag from `ctx` + // (set by the outer `enum_entry` rule). rule!( (enum_case_entry name: @name data_contents: (enum_type_parameters parameter: _* @params)) => (class_like_declaration + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} modifier: (modifier "enum_case") name: (identifier #{name}) member: (constructor_declaration parameter: {..params} body: (block))) @@ -234,6 +366,8 @@ fn translation_rules() -> Vec { (enum_case_entry name: @name raw_value: @val) => (variable_declaration + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} modifier: (modifier "enum_case") pattern: (name_pattern identifier: (identifier #{name})) value: {val}) @@ -243,28 +377,31 @@ fn translation_rules() -> Vec { (enum_case_entry name: @name) => (variable_declaration + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} modifier: (modifier "enum_case") pattern: (name_pattern identifier: (identifier #{name}))) ), - // enum_entry: flatten case entries; attach outer modifiers to each, and - // chained_declaration on every entry after the first. - rule!( + // enum_entry: flatten case entries; publish outer modifiers + // into `ctx` and translate each case with `ctx.is_chained` + // toggled per iteration so the inner `enum_case_entry` rules + // emit complete `modifier:` lists from the start. + manual_rule!( (enum_entry case: _+ @cases (modifiers)* @mods) - => - {..{ - let mod_ids: Vec = mods.iter().map(|&m| m.into()).collect(); - let case_ids: Vec = cases.iter().map(|&c| c.into()).collect(); - for (i, &case_id) in case_ids.iter().enumerate() { - if i > 0 { - let chained = __yeast_ctx.literal("modifier", "chained_declaration"); - __yeast_ctx.prepend_field(case_id, "modifier", chained); - } - for &mod_id in mod_ids.iter().rev() { - __yeast_ctx.prepend_field(case_id, "modifier", mod_id); - } + { + let mut modifiers = Vec::new(); + for m in mods { + modifiers.extend(ctx.translate(m)?); } - case_ids - }} + ctx.outer_modifiers = modifiers; + + let mut result = Vec::new(); + for (i, case) in cases.into_iter().enumerate() { + ctx.is_chained = i > 0; + result.extend(ctx.translate(case)?); + } + Ok(result) + } ), // Plain assignment: `x = expr` rule!( @@ -336,17 +473,15 @@ fn translation_rules() -> Vec { body: (block stmt: {..body_stmts})) ), // Parameters are wrapped in function_parameter, which also carries - // optional default values. - rule!( + // optional default values. Publishes the default value into `ctx` + // before translating the inner `parameter` so the `parameter` + // rules can include it as a `default:` field directly. + manual_rule!( (function_parameter parameter: @p default_value: _? @def) - => - {..{ - let p_id: usize = p.into(); - for &d in def.iter().rev() { - __yeast_ctx.prepend_field(p_id, "default", d.into()); - } - vec![p_id] - }} + { + ctx.default_value = ctx.translate_opt(def)?; + ctx.translate(p) + } ), // Parameter with external name and type rule!( @@ -354,7 +489,8 @@ fn translation_rules() -> Vec { => (parameter external_name: (identifier #{ext}) - pattern: (name_pattern identifier: (identifier #{name}))) + pattern: (name_pattern identifier: (identifier #{name})) + default: {..ctx.default_value}) ), rule!( (parameter external_name: @ext name: @name type: @ty) @@ -362,21 +498,24 @@ fn translation_rules() -> Vec { (parameter external_name: (identifier #{ext}) pattern: (name_pattern identifier: (identifier #{name})) - type: {ty}) + type: {ty} + default: {..ctx.default_value}) ), // Parameter with just name and type (no external name) rule!( (parameter name: @name) => (parameter - pattern: (name_pattern identifier: (identifier #{name}))) + pattern: (name_pattern identifier: (identifier #{name})) + default: {..ctx.default_value}) ), rule!( (parameter name: @name type: @ty) => (parameter pattern: (name_pattern identifier: (identifier #{name})) - type: {ty}) + type: {ty} + default: {..ctx.default_value}) ), // Reference to a function, f(x:y:z:). This is parsed as a call with a single argument with multiple reference_specifier labels. // We don't want downstream QL to try to handle this as a call_expr with a weird argument, so explicitly mark it as unsupported for now. @@ -484,11 +623,12 @@ fn translation_rules() -> Vec { argument: (argument value: {closure})) ), // ---- Control flow ---- + // If statement rule!( (if_statement condition: _* @cond body: @then_body else_branch: _? @else_stmts) => (if_expr - condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem})) + condition: {and_chain(&mut ctx, cond)} then: {then_body} else: {..else_stmts}) ), @@ -497,7 +637,7 @@ fn translation_rules() -> Vec { (guard_statement condition: _* @cond body: (block statement: _* @else_stmts)) => (guard_if_stmt - condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem})) + condition: {and_chain(&mut ctx, cond)} else: (block stmt: {..else_stmts})) ), // Ternary expression → if_expr @@ -575,20 +715,24 @@ fn translation_rules() -> Vec { rule!( (while_statement condition: _* @cond body: (block statement: _* @body)) => - (while_stmt condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem})) body: (block stmt: {..body})) + (while_stmt + condition: {and_chain(&mut ctx, cond)} + body: (block stmt: {..body})) ), // Repeat-while loop rule!( (repeat_while_statement condition: _* @cond body: (block statement: _* @body)) => - (do_while_stmt condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem})) body: (block stmt: {..body})) + (do_while_stmt + condition: {and_chain(&mut ctx, cond)} + body: (block stmt: {..body})) ), // Labeled statement (e.g. `outer: for ...`). Strip the trailing ':' from the label token. - rule!((labeled_statement label: (statement_label) @lbl statement: @stmt) => {..{ - let text = __yeast_ctx.ast.source_text(lbl.into()); - let name = __yeast_ctx.literal("identifier", &text[..text.len() - 1]); - vec![__yeast_ctx.node("labeled_stmt", vec![("label", vec![name]), ("stmt", vec![stmt.into()])])] - }}), + rule!((labeled_statement label: (statement_label) @lbl statement: @stmt) => { + let text = ctx.ast.source_text(lbl.into()); + let name = &text[..text.len() - 1]; + tree!((labeled_stmt label: (identifier #{name}) stmt: {stmt})) + }), // ---- Collections ---- // Array literal rule!((array_literal element: _* @elems) => (array_literal element: {..elems})), @@ -598,16 +742,9 @@ fn translation_rules() -> Vec { rule!( (dictionary_literal key: _* @keys value: _* @vals) => - (map_literal element: {..{ - keys.iter().zip(vals.iter()).map(|(&k, &v)| { - let k_id: usize = k.into(); - let v_id: usize = v.into(); - __yeast_ctx.node("key_value_pair", vec![ - ("key", vec![k_id]), - ("value", vec![v_id]), - ]) - }).collect::>() - }}) + (map_literal element: {..keys.into_iter().zip(vals).map(|(k, v)| + tree!((key_value_pair key: {k} value: {v})) + )}) ), rule!((dictionary_literal element: _* @elems) => (map_literal element: {..elems})), rule!((dictionary_literal_item key: @k value: @v) => (key_value_pair key: {k} value: {v})), @@ -669,9 +806,7 @@ fn translation_rules() -> Vec { rule!( (identifier part: _+ @parts) => - {parts}.reduce_left( - first -> (name_expr identifier: (identifier #{first})), - acc, elem -> (member_access_expr base: {acc} member: (identifier #{elem}))) + {member_chain(&mut ctx, parts)} ), // Scoped import declaration (for example `import struct Foo.Bar`): // flatten the identifier parts into a member_access_expr and bind the @@ -874,48 +1009,76 @@ fn translation_rules() -> Vec { name: (identifier #{name}) bound: {..bound}) ), - // Protocol property declaration: translate each accessor requirement to an - // accessor_declaration without a body, carrying the property name and type. - // Subsequent accessors get chained_declaration (same flattening as computed properties). - rule!( + // Protocol property declaration: translate each accessor + // requirement to an `accessor_declaration` carrying the property + // name, type, and outer modifiers. Manual rule: we publish the + // property's name/type/modifiers into `ctx` and translate each + // accessor with `ctx.is_chained` toggled per iteration so the + // inner `getter_specifier`/`setter_specifier` rules emit + // complete nodes from the start (including the + // `chained_declaration` tag for non-first accessors). + manual_rule!( (protocol_property_declaration - name: @pattern + name: (pattern bound_identifier: @name) requirements: (protocol_property_requirements accessor: _+ @accessors) type: _? @ty (modifiers)* @mods) - => - {..{ - let name_text = __yeast_ctx.ast.source_text(pattern.into()); - let mod_ids: Vec = mods.iter().map(|&m| m.into()).collect(); - let ty_ids: Vec = ty.iter().map(|&t| t.into()).collect(); - let acc_ids: Vec = accessors.iter().map(|&a| a.into()).collect(); - for (i, &acc_id) in acc_ids.iter().enumerate() { - if i > 0 { - let chained = __yeast_ctx.literal("modifier", "chained_declaration"); - __yeast_ctx.prepend_field(acc_id, "modifier", chained); - } - for &mod_id in mod_ids.iter().rev() { - __yeast_ctx.prepend_field(acc_id, "modifier", mod_id); - } - for &ty_id in ty_ids.iter().rev() { - __yeast_ctx.prepend_field(acc_id, "type", ty_id); - } - let ident = __yeast_ctx.literal("identifier", &name_text); - __yeast_ctx.prepend_field(acc_id, "name", ident); + { + ctx.property_name = Some(tree!((identifier #{name}))); + ctx.property_type = ctx.translate_opt(ty)?; + let mut modifiers = Vec::new(); + for m in mods { + modifiers.extend(ctx.translate(m)?); } - acc_ids - }} + ctx.outer_modifiers = modifiers; + + let mut result = Vec::new(); + for (i, acc) in accessors.into_iter().enumerate() { + ctx.is_chained = i > 0; + result.extend(ctx.translate(acc)?); + } + Ok(result) + } ), // getter_specifier / setter_specifier → bodyless accessor_declaration - rule!((getter_specifier) => (accessor_declaration accessor_kind: (accessor_kind "get"))), - rule!((setter_specifier) => (accessor_declaration accessor_kind: (accessor_kind "set"))), + // getter_specifier / setter_specifier → bodyless + // accessor_declaration. Reads property name/type/modifiers from + // `ctx` set by the outer `protocol_property_declaration` rule. + rule!( + (getter_specifier) + => + (accessor_declaration + name: {ctx.property_name.ok_or("getter_specifier outside protocol_property_declaration context")?} + type: {..ctx.property_type} + accessor_kind: (accessor_kind "get") + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)}) + ), + rule!( + (setter_specifier) + => + (accessor_declaration + name: {ctx.property_name.ok_or("setter_specifier outside protocol_property_declaration context")?} + type: {..ctx.property_type} + accessor_kind: (accessor_kind "set") + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)}) + ), // protocol_property_requirements wrapper — should be consumed by above; fallback rule!((protocol_property_requirements accessor: _* @accs) => {..accs}), // Computed getter → accessor_declaration (body optional). + // Reads property name/type from the outer property_binding rule + // and binding/outer modifiers + chained tag from the outer + // property_declaration rule. rule!( (computed_getter body: (block statement: _* @body)?) => (accessor_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} + name: {ctx.property_name.ok_or("computed_getter outside property_binding context")?} + type: {..ctx.property_type} accessor_kind: (accessor_kind "get") body: (block stmt: {..body})) ), @@ -924,6 +1087,11 @@ fn translation_rules() -> Vec { (computed_setter parameter: @param body: (block statement: _* @body)) => (accessor_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} + name: {ctx.property_name.ok_or("computed_setter outside property_binding context")?} + type: {..ctx.property_type} accessor_kind: (accessor_kind "set") parameter: (parameter pattern: (name_pattern identifier: (identifier #{param}))) body: (block stmt: {..body})) @@ -933,6 +1101,11 @@ fn translation_rules() -> Vec { (computed_setter body: (block statement: _* @body)?) => (accessor_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} + name: {ctx.property_name.ok_or("computed_setter outside property_binding context")?} + type: {..ctx.property_type} accessor_kind: (accessor_kind "set") body: (block stmt: {..body})) ), @@ -941,16 +1114,30 @@ fn translation_rules() -> Vec { (computed_modify body: (block statement: _* @body)) => (accessor_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} + name: {ctx.property_name.ok_or("computed_modify outside property_binding context")?} + type: {..ctx.property_type} accessor_kind: (accessor_kind "modify") body: (block stmt: {..body})) ), - // willset/didset block — spread to children + // willset/didset block — spread to children (only reachable as a + // fallback; the outer property_binding manual rule normally + // captures the willset/didset clauses directly). rule!((willset_didset_block _* @clauses) => {..clauses}), - // willset clause → accessor_declaration (body optional). + // willset clause → accessor_declaration (body optional). Reads + // `ctx.property_name` set by the outer property_binding rule and + // binding/outer modifiers + chained tag from the outer + // property_declaration rule. rule!( (willset_clause body: (block statement: _* @body)?) => (accessor_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} + name: {ctx.property_name.ok_or("willset_clause outside property_binding context")?} accessor_kind: (accessor_kind "willSet") body: (block stmt: {..body})) ), @@ -959,6 +1146,10 @@ fn translation_rules() -> Vec { (didset_clause body: (block statement: _* @body)?) => (accessor_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} + name: {ctx.property_name.ok_or("didset_clause outside property_binding context")?} accessor_kind: (accessor_kind "didSet") body: (block stmt: {..body})) ), @@ -979,14 +1170,17 @@ fn translation_rules() -> Vec { } pub fn language_spec(desugared_ast_schema: &'static str) -> simple::LanguageSpec { - let desugar = DesugaringConfig::new() + let ts_language: tree_sitter::Language = tree_sitter_swift::LANGUAGE.into(); + let config = DesugaringConfig::::new() .add_phase("translate", PhaseKind::OneShot, translation_rules()) .with_output_node_types_yaml(desugared_ast_schema); + let desugarer = ConcreteDesugarer::new(ts_language.clone(), config) + .expect("failed to build Swift desugarer"); simple::LanguageSpec { prefix: "swift", - ts_language: tree_sitter_swift::LANGUAGE.into(), + ts_language, node_types: tree_sitter_swift::NODE_TYPES, file_globs: vec!["*.swift".into(), "*.swiftinterface".into()], - desugar: Some(desugar), + desugar: Some(Box::new(desugarer)), } } diff --git a/unified/extractor/tests/corpus/swift/types.txt b/unified/extractor/tests/corpus/swift/types.txt index ef15ad87f594..9c22ae74798b 100644 --- a/unified/extractor/tests/corpus/swift/types.txt +++ b/unified/extractor/tests/corpus/swift/types.txt @@ -924,3 +924,159 @@ top_level accessor_kind: accessor_kind "set" modifier: modifier "class" name: identifier "Box" + +=== +Protocol with read-only and read-write property requirements +=== + +protocol P { + var foo: Int { get } + var bar: String { get set } +} + +--- + +source_file + statement: + protocol_declaration + body: + protocol_body + member: + protocol_property_declaration + name: + pattern + binding: + value_binding_pattern + mutability: var + bound_identifier: simple_identifier "foo" + requirements: + protocol_property_requirements + accessor: + getter_specifier + type: + type_annotation + type: + type + name: + user_type + part: + simple_user_type + name: type_identifier "Int" + protocol_property_declaration + name: + pattern + binding: + value_binding_pattern + mutability: var + bound_identifier: simple_identifier "bar" + requirements: + protocol_property_requirements + accessor: + getter_specifier + setter_specifier + type: + type_annotation + type: + type + name: + user_type + part: + simple_user_type + name: type_identifier "String" + name: type_identifier "P" + +--- + +top_level + body: + block + stmt: + class_like_declaration + member: + accessor_declaration + name: identifier "foo" + type: + named_type_expr + name: identifier "Int" + accessor_kind: accessor_kind "get" + accessor_declaration + name: identifier "bar" + type: + named_type_expr + name: identifier "String" + accessor_kind: accessor_kind "get" + accessor_declaration + modifier: modifier "chained_declaration" + name: identifier "bar" + type: + named_type_expr + name: identifier "String" + accessor_kind: accessor_kind "set" + modifier: modifier "protocol" + name: identifier "P" + +=== +Enum with comma-separated cases (chained_declaration) +=== + +enum Suit { + case clubs, diamonds, hearts, spades +} + +--- + +source_file + statement: + class_declaration + body: + enum_class_body + member: + enum_entry + case: + enum_case_entry + name: simple_identifier "clubs" + enum_case_entry + name: simple_identifier "diamonds" + enum_case_entry + name: simple_identifier "hearts" + enum_case_entry + name: simple_identifier "spades" + declaration_kind: enum + name: type_identifier "Suit" + +--- + +top_level + body: + block + stmt: + class_like_declaration + member: + variable_declaration + modifier: modifier "enum_case" + pattern: + name_pattern + identifier: identifier "clubs" + variable_declaration + modifier: + modifier "chained_declaration" + modifier "enum_case" + pattern: + name_pattern + identifier: identifier "diamonds" + variable_declaration + modifier: + modifier "chained_declaration" + modifier "enum_case" + pattern: + name_pattern + identifier: identifier "hearts" + variable_declaration + modifier: + modifier "chained_declaration" + modifier "enum_case" + pattern: + name_pattern + identifier: identifier "spades" + modifier: modifier "enum" + name: identifier "Suit" diff --git a/unified/extractor/tests/corpus/swift/variables.txt b/unified/extractor/tests/corpus/swift/variables.txt index f1da058eef2e..78b80d9a5098 100644 --- a/unified/extractor/tests/corpus/swift/variables.txt +++ b/unified/extractor/tests/corpus/swift/variables.txt @@ -319,3 +319,130 @@ top_level name_expr identifier: identifier "x" value: int_literal "1" + +=== +Property with willSet and didSet observers +=== + +class C { + var x: Int = 0 { + willSet { print(newValue) } + didSet { print(oldValue) } + } +} + +--- + +source_file + statement: + class_declaration + body: + class_body + member: + property_declaration + binding: + value_binding_pattern + mutability: var + declarator: + property_binding + name: + pattern + bound_identifier: simple_identifier "x" + observers: + willset_didset_block + didset: + didset_clause + body: + block + statement: + call_expression + function: simple_identifier "print" + suffix: + call_suffix + arguments: + value_arguments + argument: + value_argument + value: simple_identifier "oldValue" + willset: + willset_clause + body: + block + statement: + call_expression + function: simple_identifier "print" + suffix: + call_suffix + arguments: + value_arguments + argument: + value_argument + value: simple_identifier "newValue" + type: + type_annotation + type: + type + name: + user_type + part: + simple_user_type + name: type_identifier "Int" + value: integer_literal "0" + declaration_kind: class + name: type_identifier "C" + +--- + +top_level + body: + block + stmt: + class_like_declaration + member: + variable_declaration + modifier: modifier "var" + pattern: + name_pattern + identifier: identifier "x" + type: + named_type_expr + name: identifier "Int" + value: int_literal "0" + accessor_declaration + body: + block + stmt: + call_expr + argument: + argument + value: + name_expr + identifier: identifier "newValue" + callee: + name_expr + identifier: identifier "print" + modifier: + modifier "var" + modifier "chained_declaration" + name: identifier "x" + accessor_kind: accessor_kind "willSet" + accessor_declaration + body: + block + stmt: + call_expr + argument: + argument + value: + name_expr + identifier: identifier "oldValue" + callee: + name_expr + identifier: identifier "print" + modifier: + modifier "var" + modifier "chained_declaration" + name: identifier "x" + accessor_kind: accessor_kind "didSet" + modifier: modifier "class" + name: identifier "C" diff --git a/unified/extractor/tests/corpus_tests.rs b/unified/extractor/tests/corpus_tests.rs index 0f1057a8e5b9..6c859c2f6cf0 100644 --- a/unified/extractor/tests/corpus_tests.rs +++ b/unified/extractor/tests/corpus_tests.rs @@ -2,7 +2,7 @@ use std::fs; use std::path::Path; use codeql_extractor::extractor::simple; -use yeast::{dump::dump_ast, dump::dump_ast_with_type_errors, Runner}; +use yeast::{Runner, dump::dump_ast, dump::dump_ast_with_type_errors}; #[path = "../src/languages/mod.rs"] mod languages; @@ -146,29 +146,36 @@ fn render_corpus(cases: &[CorpusCase]) -> String { out } -fn run_desugaring( - lang: &simple::LanguageSpec, - input: &str, -) -> Result { - let runner = match lang.desugar.as_ref() { - Some(config) => Runner::from_config(lang.ts_language.clone(), config) - .map_err(|e| format!("Failed to create yeast runner: {e}"))?, - None => Runner::new(lang.ts_language.clone(), &[]), - }; - - runner - .run(input) - .map_err(|e| format!("Failed to parse input: {e}")) +fn run_desugaring(lang: &simple::LanguageSpec, input: &str) -> Result { + match lang.desugar.as_deref() { + Some(desugarer) => { + // Parse the input ourselves so we don't depend on the desugarer + // knowing about the language. + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&lang.ts_language) + .map_err(|e| format!("Failed to set language: {e}"))?; + let tree = parser + .parse(input, None) + .ok_or_else(|| "Failed to parse input".to_string())?; + desugarer + .run_from_tree(&tree, input.as_bytes()) + .map_err(|e| format!("Desugaring failed: {e}")) + } + None => { + let runner: Runner = Runner::new(lang.ts_language.clone(), &[]); + runner + .run(input) + .map_err(|e| format!("Failed to parse input: {e}")) + } + } } /// Produce the raw tree-sitter parse tree dump for `input`, with no /// desugaring rules applied. Uses a `Runner` with an empty phase list and /// the input grammar's own schema. -fn dump_raw_parse( - lang: &simple::LanguageSpec, - input: &str, -) -> Result { - let runner = Runner::new(lang.ts_language.clone(), &[]); +fn dump_raw_parse(lang: &simple::LanguageSpec, input: &str) -> Result { + let runner: Runner = Runner::new(lang.ts_language.clone(), &[]); let ast = runner .run(input) .map_err(|e| format!("Failed to parse input: {e}"))?; @@ -272,11 +279,7 @@ fn test_corpus() { } } - assert!( - failures.is_empty(), - "{}", - failures.join("\n\n") + "\n\n" - ); + assert!(failures.is_empty(), "{}", failures.join("\n\n") + "\n\n"); if update_mode { let updated = render_corpus(&cases); @@ -285,7 +288,9 @@ fn test_corpus() { write_result.is_ok(), "Failed to update corpus file {}: {}", corpus_path.display(), - write_result.err().map_or_else(String::new, |e| e.to_string()) + write_result + .err() + .map_or_else(String::new, |e| e.to_string()) ); } }