diff options
| author | Aleksey Kladov <aleksey.kladov@gmail.com> | 2021-07-03 22:11:03 +0300 |
|---|---|---|
| committer | Aleksey Kladov <aleksey.kladov@gmail.com> | 2021-07-03 22:11:03 +0300 |
| commit | 58d2ece88a17030668e09e4aade7bb2ed27dcaac (patch) | |
| tree | 38ffd8178f472d0154c02ed111b4a980fe087edc /crates/syntax/src/tests | |
| parent | 668d0612452913624ef8aa4f17d7fef9ac08a75f (diff) | |
| download | rust-58d2ece88a17030668e09e4aade7bb2ed27dcaac.tar.gz rust-58d2ece88a17030668e09e4aade7bb2ed27dcaac.zip | |
internal: overhaul code generation
* Keep codegen adjacent to the relevant crates. * Remove codgen deps from xtask, speeding-up from-source installation. This regresses the release process a bit, as it now needs to run the tests (and, by extension, compile the code).
Diffstat (limited to 'crates/syntax/src/tests')
| -rw-r--r-- | crates/syntax/src/tests/ast_src.rs | 258 | ||||
| -rw-r--r-- | crates/syntax/src/tests/sourcegen_ast.rs | 747 | ||||
| -rw-r--r-- | crates/syntax/src/tests/sourcegen_tests.rs | 124 |
3 files changed, 1129 insertions, 0 deletions
diff --git a/crates/syntax/src/tests/ast_src.rs b/crates/syntax/src/tests/ast_src.rs new file mode 100644 index 00000000000..fe37d024518 --- /dev/null +++ b/crates/syntax/src/tests/ast_src.rs @@ -0,0 +1,258 @@ +//! Defines input for code generation process. + +pub(crate) struct KindsSrc<'a> { + pub(crate) punct: &'a [(&'a str, &'a str)], + pub(crate) keywords: &'a [&'a str], + pub(crate) contextual_keywords: &'a [&'a str], + pub(crate) literals: &'a [&'a str], + pub(crate) tokens: &'a [&'a str], + pub(crate) nodes: &'a [&'a str], +} + +pub(crate) const KINDS_SRC: KindsSrc = KindsSrc { + punct: &[ + (";", "SEMICOLON"), + (",", "COMMA"), + ("(", "L_PAREN"), + (")", "R_PAREN"), + ("{", "L_CURLY"), + ("}", "R_CURLY"), + ("[", "L_BRACK"), + ("]", "R_BRACK"), + ("<", "L_ANGLE"), + (">", "R_ANGLE"), + ("@", "AT"), + ("#", "POUND"), + ("~", "TILDE"), + ("?", "QUESTION"), + ("$", "DOLLAR"), + ("&", "AMP"), + ("|", "PIPE"), + ("+", "PLUS"), + ("*", "STAR"), + ("/", "SLASH"), + ("^", "CARET"), + ("%", "PERCENT"), + ("_", "UNDERSCORE"), + (".", "DOT"), + ("..", "DOT2"), + ("...", "DOT3"), + ("..=", "DOT2EQ"), + (":", "COLON"), + ("::", "COLON2"), + ("=", "EQ"), + ("==", "EQ2"), + ("=>", "FAT_ARROW"), + ("!", "BANG"), + ("!=", "NEQ"), + ("-", "MINUS"), + ("->", "THIN_ARROW"), + ("<=", "LTEQ"), + (">=", "GTEQ"), + ("+=", "PLUSEQ"), + ("-=", "MINUSEQ"), + ("|=", "PIPEEQ"), + ("&=", "AMPEQ"), + ("^=", "CARETEQ"), + ("/=", "SLASHEQ"), + ("*=", "STAREQ"), + ("%=", "PERCENTEQ"), + ("&&", "AMP2"), + ("||", "PIPE2"), + ("<<", "SHL"), + (">>", "SHR"), + ("<<=", "SHLEQ"), + (">>=", "SHREQ"), + ], + keywords: &[ + "as", "async", "await", "box", "break", "const", "continue", "crate", "dyn", "else", + "enum", "extern", "false", "fn", "for", "if", "impl", "in", "let", "loop", "macro", + "match", "mod", "move", "mut", "pub", "ref", "return", "self", "static", "struct", "super", + "trait", "true", "try", "type", "unsafe", "use", "where", "while", "yield", + ], + contextual_keywords: &["auto", "default", "existential", "union", "raw", "macro_rules"], + literals: &["INT_NUMBER", "FLOAT_NUMBER", "CHAR", "BYTE", "STRING", "BYTE_STRING"], + tokens: &[ + "ERROR", + "IDENT", + "WHITESPACE", + "LIFETIME_IDENT", + "COMMENT", + "SHEBANG", + "L_DOLLAR", + "R_DOLLAR", + ], + nodes: &[ + "SOURCE_FILE", + "STRUCT", + "UNION", + "ENUM", + "FN", + "RET_TYPE", + "EXTERN_CRATE", + "MODULE", + "USE", + "STATIC", + "CONST", + "TRAIT", + "IMPL", + "TYPE_ALIAS", + "MACRO_CALL", + "MACRO_RULES", + "MACRO_ARM", + "TOKEN_TREE", + "MACRO_DEF", + "PAREN_TYPE", + "TUPLE_TYPE", + "MACRO_TYPE", + "NEVER_TYPE", + "PATH_TYPE", + "PTR_TYPE", + "ARRAY_TYPE", + "SLICE_TYPE", + "REF_TYPE", + "INFER_TYPE", + "FN_PTR_TYPE", + "FOR_TYPE", + "IMPL_TRAIT_TYPE", + "DYN_TRAIT_TYPE", + "OR_PAT", + "PAREN_PAT", + "REF_PAT", + "BOX_PAT", + "IDENT_PAT", + "WILDCARD_PAT", + "REST_PAT", + "PATH_PAT", + "RECORD_PAT", + "RECORD_PAT_FIELD_LIST", + "RECORD_PAT_FIELD", + "TUPLE_STRUCT_PAT", + "TUPLE_PAT", + "SLICE_PAT", + "RANGE_PAT", + "LITERAL_PAT", + "MACRO_PAT", + "CONST_BLOCK_PAT", + // atoms + "TUPLE_EXPR", + "ARRAY_EXPR", + "PAREN_EXPR", + "PATH_EXPR", + "CLOSURE_EXPR", + "IF_EXPR", + "WHILE_EXPR", + "CONDITION", + "LOOP_EXPR", + "FOR_EXPR", + "CONTINUE_EXPR", + "BREAK_EXPR", + "LABEL", + "BLOCK_EXPR", + "RETURN_EXPR", + "YIELD_EXPR", + "MATCH_EXPR", + "MATCH_ARM_LIST", + "MATCH_ARM", + "MATCH_GUARD", + "RECORD_EXPR", + "RECORD_EXPR_FIELD_LIST", + "RECORD_EXPR_FIELD", + "EFFECT_EXPR", + "BOX_EXPR", + // postfix + "CALL_EXPR", + "INDEX_EXPR", + "METHOD_CALL_EXPR", + "FIELD_EXPR", + "AWAIT_EXPR", + "TRY_EXPR", + "CAST_EXPR", + // unary + "REF_EXPR", + "PREFIX_EXPR", + "RANGE_EXPR", // just weird + "BIN_EXPR", + "EXTERN_BLOCK", + "EXTERN_ITEM_LIST", + "VARIANT", + "RECORD_FIELD_LIST", + "RECORD_FIELD", + "TUPLE_FIELD_LIST", + "TUPLE_FIELD", + "VARIANT_LIST", + "ITEM_LIST", + "ASSOC_ITEM_LIST", + "ATTR", + "META", + "USE_TREE", + "USE_TREE_LIST", + "PATH", + "PATH_SEGMENT", + "LITERAL", + "RENAME", + "VISIBILITY", + "WHERE_CLAUSE", + "WHERE_PRED", + "ABI", + "NAME", + "NAME_REF", + "LET_STMT", + "EXPR_STMT", + "GENERIC_PARAM_LIST", + "GENERIC_PARAM", + "LIFETIME_PARAM", + "TYPE_PARAM", + "CONST_PARAM", + "GENERIC_ARG_LIST", + "LIFETIME", + "LIFETIME_ARG", + "TYPE_ARG", + "ASSOC_TYPE_ARG", + "CONST_ARG", + "PARAM_LIST", + "PARAM", + "SELF_PARAM", + "ARG_LIST", + "TYPE_BOUND", + "TYPE_BOUND_LIST", + // macro related + "MACRO_ITEMS", + "MACRO_STMTS", + ], +}; + +#[derive(Default, Debug)] +pub(crate) struct AstSrc { + pub(crate) tokens: Vec<String>, + pub(crate) nodes: Vec<AstNodeSrc>, + pub(crate) enums: Vec<AstEnumSrc>, +} + +#[derive(Debug)] +pub(crate) struct AstNodeSrc { + pub(crate) doc: Vec<String>, + pub(crate) name: String, + pub(crate) traits: Vec<String>, + pub(crate) fields: Vec<Field>, +} + +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum Field { + Token(String), + Node { name: String, ty: String, cardinality: Cardinality }, +} + +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum Cardinality { + Optional, + Many, +} + +#[derive(Debug)] +pub(crate) struct AstEnumSrc { + pub(crate) doc: Vec<String>, + pub(crate) name: String, + pub(crate) traits: Vec<String>, + pub(crate) variants: Vec<String>, +} diff --git a/crates/syntax/src/tests/sourcegen_ast.rs b/crates/syntax/src/tests/sourcegen_ast.rs new file mode 100644 index 00000000000..d2bafb3a7b8 --- /dev/null +++ b/crates/syntax/src/tests/sourcegen_ast.rs @@ -0,0 +1,747 @@ +//! This module generates AST datatype used by rust-analyzer. +//! +//! Specifically, it generates the `SyntaxKind` enum and a number of newtype +//! wrappers around `SyntaxNode` which implement `syntax::AstNode`. + +use std::{ + collections::{BTreeSet, HashSet}, + fmt::Write, +}; + +use proc_macro2::{Punct, Spacing}; +use quote::{format_ident, quote}; +use ungrammar::{rust_grammar, Grammar, Rule}; + +use crate::tests::ast_src::{ + AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc, KINDS_SRC, +}; + +#[test] +fn sourcegen_ast() { + let grammar = rust_grammar(); + let ast = lower(&grammar); + + let syntax_kinds_file = + sourcegen::project_root().join("crates/parser/src/syntax_kind/generated.rs"); + let syntax_kinds = generate_syntax_kinds(KINDS_SRC); + sourcegen::ensure_file_contents(syntax_kinds_file.as_path(), &syntax_kinds); + + let ast_tokens_file = + sourcegen::project_root().join("crates/syntax/src/ast/generated/tokens.rs"); + let contents = generate_tokens(&ast); + sourcegen::ensure_file_contents(ast_tokens_file.as_path(), &contents); + + let ast_nodes_file = sourcegen::project_root().join("crates/syntax/src/ast/generated/nodes.rs"); + let contents = generate_nodes(KINDS_SRC, &ast); + sourcegen::ensure_file_contents(ast_nodes_file.as_path(), &contents); +} + +fn generate_tokens(grammar: &AstSrc) -> String { + let tokens = grammar.tokens.iter().map(|token| { + let name = format_ident!("{}", token); + let kind = format_ident!("{}", to_upper_snake_case(token)); + quote! { + #[derive(Debug, Clone, PartialEq, Eq, Hash)] + pub struct #name { + pub(crate) syntax: SyntaxToken, + } + impl std::fmt::Display for #name { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.syntax, f) + } + } + impl AstToken for #name { + fn can_cast(kind: SyntaxKind) -> bool { kind == #kind } + fn cast(syntax: SyntaxToken) -> Option<Self> { + if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None } + } + fn syntax(&self) -> &SyntaxToken { &self.syntax } + } + } + }); + + sourcegen::add_preamble( + "sourcegen_ast", + sourcegen::reformat( + quote! { + use crate::{SyntaxKind::{self, *}, SyntaxToken, ast::AstToken}; + #(#tokens)* + } + .to_string(), + ), + ) + .replace("#[derive", "\n#[derive") +} + +fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> String { + let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar + .nodes + .iter() + .map(|node| { + let name = format_ident!("{}", node.name); + let kind = format_ident!("{}", to_upper_snake_case(&node.name)); + let traits = node.traits.iter().map(|trait_name| { + let trait_name = format_ident!("{}", trait_name); + quote!(impl ast::#trait_name for #name {}) + }); + + let methods = node.fields.iter().map(|field| { + let method_name = field.method_name(); + let ty = field.ty(); + + if field.is_many() { + quote! { + pub fn #method_name(&self) -> AstChildren<#ty> { + support::children(&self.syntax) + } + } + } else if let Some(token_kind) = field.token_kind() { + quote! { + pub fn #method_name(&self) -> Option<#ty> { + support::token(&self.syntax, #token_kind) + } + } + } else { + quote! { + pub fn #method_name(&self) -> Option<#ty> { + support::child(&self.syntax) + } + } + } + }); + ( + quote! { + #[pretty_doc_comment_placeholder_workaround] + #[derive(Debug, Clone, PartialEq, Eq, Hash)] + pub struct #name { + pub(crate) syntax: SyntaxNode, + } + + #(#traits)* + + impl #name { + #(#methods)* + } + }, + quote! { + impl AstNode for #name { + fn can_cast(kind: SyntaxKind) -> bool { + kind == #kind + } + fn cast(syntax: SyntaxNode) -> Option<Self> { + if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None } + } + fn syntax(&self) -> &SyntaxNode { &self.syntax } + } + }, + ) + }) + .unzip(); + + let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar + .enums + .iter() + .map(|en| { + let variants: Vec<_> = en.variants.iter().map(|var| format_ident!("{}", var)).collect(); + let name = format_ident!("{}", en.name); + let kinds: Vec<_> = variants + .iter() + .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string()))) + .collect(); + let traits = en.traits.iter().map(|trait_name| { + let trait_name = format_ident!("{}", trait_name); + quote!(impl ast::#trait_name for #name {}) + }); + + let ast_node = if en.name == "Stmt" { + quote! {} + } else { + quote! { + impl AstNode for #name { + fn can_cast(kind: SyntaxKind) -> bool { + match kind { + #(#kinds)|* => true, + _ => false, + } + } + fn cast(syntax: SyntaxNode) -> Option<Self> { + let res = match syntax.kind() { + #( + #kinds => #name::#variants(#variants { syntax }), + )* + _ => return None, + }; + Some(res) + } + fn syntax(&self) -> &SyntaxNode { + match self { + #( + #name::#variants(it) => &it.syntax, + )* + } + } + } + } + }; + + ( + quote! { + #[pretty_doc_comment_placeholder_workaround] + #[derive(Debug, Clone, PartialEq, Eq, Hash)] + pub enum #name { + #(#variants(#variants),)* + } + + #(#traits)* + }, + quote! { + #( + impl From<#variants> for #name { + fn from(node: #variants) -> #name { + #name::#variants(node) + } + } + )* + #ast_node + }, + ) + }) + .unzip(); + + let enum_names = grammar.enums.iter().map(|it| &it.name); + let node_names = grammar.nodes.iter().map(|it| &it.name); + + let display_impls = + enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| { + quote! { + impl std::fmt::Display for #name { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } + } + } + }); + + let defined_nodes: HashSet<_> = node_names.collect(); + + for node in kinds + .nodes + .iter() + .map(|kind| to_pascal_case(kind)) + .filter(|name| !defined_nodes.iter().any(|&it| it == name)) + { + drop(node) + // FIXME: restore this + // eprintln!("Warning: node {} not defined in ast source", node); + } + + let ast = quote! { + use crate::{ + SyntaxNode, SyntaxToken, SyntaxKind::{self, *}, + ast::{self, AstNode, AstChildren, support}, + T, + }; + + #(#node_defs)* + #(#enum_defs)* + #(#node_boilerplate_impls)* + #(#enum_boilerplate_impls)* + #(#display_impls)* + }; + + let ast = ast.to_string().replace("T ! [", "T!["); + + let mut res = String::with_capacity(ast.len() * 2); + + let mut docs = + grammar.nodes.iter().map(|it| &it.doc).chain(grammar.enums.iter().map(|it| &it.doc)); + + for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") { + res.push_str(chunk); + if let Some(doc) = docs.next() { + write_doc_comment(doc, &mut res); + } + } + + sourcegen::add_preamble("sourcegen_ast", sourcegen::reformat(res)) +} + +fn write_doc_comment(contents: &[String], dest: &mut String) { + for line in contents { + writeln!(dest, "///{}", line).unwrap(); + } +} + +fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> String { + let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar + .punct + .iter() + .filter(|(token, _name)| token.len() == 1) + .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name))) + .unzip(); + + let punctuation_values = grammar.punct.iter().map(|(token, _name)| { + if "{}[]()".contains(token) { + let c = token.chars().next().unwrap(); + quote! { #c } + } else { + let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint)); + quote! { #(#cs)* } + } + }); + let punctuation = + grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::<Vec<_>>(); + + let full_keywords_values = &grammar.keywords; + let full_keywords = + full_keywords_values.iter().map(|kw| format_ident!("{}_KW", to_upper_snake_case(kw))); + + let all_keywords_values = + grammar.keywords.iter().chain(grammar.contextual_keywords.iter()).collect::<Vec<_>>(); + let all_keywords_idents = all_keywords_values.iter().map(|kw| format_ident!("{}", kw)); + let all_keywords = all_keywords_values + .iter() + .map(|name| format_ident!("{}_KW", to_upper_snake_case(name))) + .collect::<Vec<_>>(); + + let literals = + grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>(); + + let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>(); + + let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>(); + + let ast = quote! { + #![allow(bad_style, missing_docs, unreachable_pub)] + /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`. + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] + #[repr(u16)] + pub enum SyntaxKind { + // Technical SyntaxKinds: they appear temporally during parsing, + // but never end up in the final tree + #[doc(hidden)] + TOMBSTONE, + #[doc(hidden)] + EOF, + #(#punctuation,)* + #(#all_keywords,)* + #(#literals,)* + #(#tokens,)* + #(#nodes,)* + + // Technical kind so that we can cast from u16 safely + #[doc(hidden)] + __LAST, + } + use self::SyntaxKind::*; + + impl SyntaxKind { + pub fn is_keyword(self) -> bool { + match self { + #(#all_keywords)|* => true, + _ => false, + } + } + + pub fn is_punct(self) -> bool { + match self { + #(#punctuation)|* => true, + _ => false, + } + } + + pub fn is_literal(self) -> bool { + match self { + #(#literals)|* => true, + _ => false, + } + } + + pub fn from_keyword(ident: &str) -> Option<SyntaxKind> { + let kw = match ident { + #(#full_keywords_values => #full_keywords,)* + _ => return None, + }; + Some(kw) + } + + pub fn from_char(c: char) -> Option<SyntaxKind> { + let tok = match c { + #(#single_byte_tokens_values => #single_byte_tokens,)* + _ => return None, + }; + Some(tok) + } + } + + #[macro_export] + macro_rules! T { + #([#punctuation_values] => { $crate::SyntaxKind::#punctuation };)* + #([#all_keywords_idents] => { $crate::SyntaxKind::#all_keywords };)* + [lifetime_ident] => { $crate::SyntaxKind::LIFETIME_IDENT }; + [ident] => { $crate::SyntaxKind::IDENT }; + [shebang] => { $crate::SyntaxKind::SHEBANG }; + } + }; + + sourcegen::add_preamble("sourcegen_ast", sourcegen::reformat(ast.to_string())) +} + +fn to_upper_snake_case(s: &str) -> String { + let mut buf = String::with_capacity(s.len()); + let mut prev = false; + for c in s.chars() { + if c.is_ascii_uppercase() && prev { + buf.push('_') + } + prev = true; + + buf.push(c.to_ascii_uppercase()); + } + buf +} + +fn to_lower_snake_case(s: &str) -> String { + let mut buf = String::with_capacity(s.len()); + let mut prev = false; + for c in s.chars() { + if c.is_ascii_uppercase() && prev { + buf.push('_') + } + prev = true; + + buf.push(c.to_ascii_lowercase()); + } + buf +} + +fn to_pascal_case(s: &str) -> String { + let mut buf = String::with_capacity(s.len()); + let mut prev_is_underscore = true; + for c in s.chars() { + if c == '_' { + prev_is_underscore = true; + } else if prev_is_underscore { + buf.push(c.to_ascii_uppercase()); + prev_is_underscore = false; + } else { + buf.push(c.to_ascii_lowercase()); + } + } + buf +} + +fn pluralize(s: &str) -> String { + format!("{}s", s) +} + +impl Field { + fn is_many(&self) -> bool { + matches!(self, Field::Node { cardinality: Cardinality::Many, .. }) + } + fn token_kind(&self) -> Option<proc_macro2::TokenStream> { + match self { + Field::Token(token) => { + let token: proc_macro2::TokenStream = token.parse().unwrap(); + Some(quote! { T![#token] }) + } + _ => None, + } + } + fn method_name(&self) -> proc_macro2::Ident { + match self { + Field::Token(name) => { + let name = match name.as_str() { + ";" => "semicolon", + "->" => "thin_arrow", + "'{'" => "l_curly", + "'}'" => "r_curly", + "'('" => "l_paren", + "')'" => "r_paren", + "'['" => "l_brack", + "']'" => "r_brack", + "<" => "l_angle", + ">" => "r_angle", + "=" => "eq", + "!" => "excl", + "*" => "star", + "&" => "amp", + "_" => "underscore", + "." => "dot", + ".." => "dotdot", + "..." => "dotdotdot", + "..=" => "dotdoteq", + "=>" => "fat_arrow", + "@" => "at", + ":" => "colon", + "::" => "coloncolon", + "#" => "pound", + "?" => "question_mark", + "," => "comma", + "|" => "pipe", + _ => name, + }; + format_ident!("{}_token", name) + } + Field::Node { name, .. } => { + if name == "type" { + format_ident!("ty") + } else { + format_ident!("{}", name) + } + } + } + } + fn ty(&self) -> proc_macro2::Ident { + match self { + Field::Token(_) => format_ident!("SyntaxToken"), + Field::Node { ty, .. } => format_ident!("{}", ty), + } + } +} + +fn lower(grammar: &Grammar) -> AstSrc { + let mut res = AstSrc::default(); + + res.tokens = "Whitespace Comment String ByteString IntNumber FloatNumber" + .split_ascii_whitespace() + .map(|it| it.to_string()) + .collect::<Vec<_>>(); + + let nodes = grammar.iter().collect::<Vec<_>>(); + + for &node in &nodes { + let name = grammar[node].name.clone(); + let rule = &grammar[node].rule; + match lower_enum(grammar, rule) { + Some(variants) => { + let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants }; + res.enums.push(enum_src); + } + None => { + let mut fields = Vec::new(); + lower_rule(&mut fields, grammar, None, rule); + res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields }); + } + } + } + + deduplicate_fields(&mut res); + extract_enums(&mut res); + extract_struct_traits(&mut res); + extract_enum_traits(&mut res); + res +} + +fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> { + let alternatives = match rule { + Rule::Alt(it) => it, + _ => return None, + }; + let mut variants = Vec::new(); + for alternative in alternatives { + match alternative { + Rule::Node(it) => variants.push(grammar[*it].name.clone()), + Rule::Token(it) if grammar[*it].name == ";" => (), + _ => return None, + } + } + Some(variants) +} + +fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, label: Option<&String>, rule: &Rule) { + if lower_comma_list(acc, grammar, label, rule) { + return; + } + + match rule { + Rule::Node(node) => { + let ty = grammar[*node].name.clone(); + let name = label.cloned().unwrap_or_else(|| to_lower_snake_case(&ty)); + let field = Field::Node { name, ty, cardinality: Cardinality::Optional }; + acc.push(field); + } + Rule::Token(token) => { + assert!(label.is_none()); + let mut name = grammar[*token].name.clone(); + if name != "int_number" && name != "string" { + if "[]{}()".contains(&name) { + name = format!("'{}'", name); + } + let field = Field::Token(name); + acc.push(field); + } + } + Rule::Rep(inner) => { + if let Rule::Node(node) = &**inner { + let ty = grammar[*node].name.clone(); + let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty))); + let field = Field::Node { name, ty, cardinality: Cardinality::Many }; + acc.push(field); + return; + } + panic!("unhandled rule: {:?}", rule) + } + Rule::Labeled { label: l, rule } => { + assert!(label.is_none()); + let manually_implemented = matches!( + l.as_str(), + "lhs" + | "rhs" + | "then_branch" + | "else_branch" + | "start" + | "end" + | "op" + | "index" + | "base" + | "value" + | "trait" + | "self_ty" + ); + if manually_implemented { + return; + } + lower_rule(acc, grammar, Some(l), rule); + } + Rule::Seq(rules) | Rule::Alt(rules) => { + for rule in rules { + lower_rule(acc, grammar, label, rule) + } + } + Rule::Opt(rule) => lower_rule(acc, grammar, label, rule), + } +} + +// (T (',' T)* ','?) +fn lower_comma_list( + acc: &mut Vec<Field>, + grammar: &Grammar, + label: Option<&String>, + rule: &Rule, +) -> bool { + let rule = match rule { + Rule::Seq(it) => it, + _ => return false, + }; + let (node, repeat, trailing_comma) = match rule.as_slice() { + [Rule::Node(node), Rule::Rep(repeat), Rule::Opt(trailing_comma)] => { + (node, repeat, trailing_comma) + } + _ => return false, + }; + let repeat = match &**repeat { + Rule::Seq(it) => it, + _ => return false, + }; + match repeat.as_slice() { + [comma, Rule::Node(n)] if comma == &**trailing_comma && n == node => (), + _ => return false, + } + let ty = grammar[*node].name.clone(); + let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty))); + let field = Field::Node { name, ty, cardinality: Cardinality::Many }; + acc.push(field); + true +} + +fn deduplicate_fields(ast: &mut AstSrc) { + for node in &mut ast.nodes { + let mut i = 0; + 'outer: while i < node.fields.len() { + for j in 0..i { + let f1 = &node.fields[i]; + let f2 = &node.fields[j]; + if f1 == f2 { + node.fields.remove(i); + continue 'outer; + } + } + i += 1; + } + } +} + +fn extract_enums(ast: &mut AstSrc) { + for node in &mut ast.nodes { + for enm in &ast.enums { + let mut to_remove = Vec::new(); + for (i, field) in node.fields.iter().enumerate() { + let ty = field.ty().to_string(); + if enm.variants.iter().any(|it| it == &ty) { + to_remove.push(i); + } + } + if to_remove.len() == enm.variants.len() { + node.remove_field(to_remove); + let ty = enm.name.clone(); + let name = to_lower_snake_case(&ty); + node.fields.push(Field::Node { name, ty, cardinality: Cardinality::Optional }); + } + } + } +} + +fn extract_struct_traits(ast: &mut AstSrc) { + let traits: &[(&str, &[&str])] = &[ + ("AttrsOwner", &["attrs"]), + ("NameOwner", &["name"]), + ("VisibilityOwner", &["visibility"]), + ("GenericParamsOwner", &["generic_param_list", "where_clause"]), + ("TypeBoundsOwner", &["type_bound_list", "colon_token"]), + ("ModuleItemOwner", &["items"]), + ("LoopBodyOwner", &["label", "loop_body"]), + ("ArgListOwner", &["arg_list"]), + ]; + + for node in &mut ast.nodes { + for (name, methods) in traits { + extract_struct_trait(node, name, methods); + } + } +} + +fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) { + let mut to_remove = Vec::new(); + for (i, field) in node.fields.iter().enumerate() { + let method_name = field.method_name().to_string(); + if methods.iter().any(|&it| it == method_name) { + to_remove.push(i); + } + } + if to_remove.len() == methods.len() { + node.traits.push(trait_name.to_string()); + node.remove_field(to_remove); + } +} + +fn extract_enum_traits(ast: &mut AstSrc) { + for enm in &mut ast.enums { + if enm.name == "Stmt" { + continue; + } + let nodes = &ast.nodes; + let mut variant_traits = enm + .variants + .iter() + .map(|var| nodes.iter().find(|it| &it.name == var).unwrap()) + .map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>()); + + let mut enum_traits = match variant_traits.next() { + Some(it) => it, + None => continue, + }; + for traits in variant_traits { + enum_traits = enum_traits.intersection(&traits).cloned().collect(); + } + enm.traits = enum_traits.into_iter().collect(); + } +} + +impl AstNodeSrc { + fn remove_field(&mut self, to_remove: Vec<usize>) { + to_remove.into_iter().rev().for_each(|idx| { + self.fields.remove(idx); + }); + } +} diff --git a/crates/syntax/src/tests/sourcegen_tests.rs b/crates/syntax/src/tests/sourcegen_tests.rs new file mode 100644 index 00000000000..7f56807947f --- /dev/null +++ b/crates/syntax/src/tests/sourcegen_tests.rs @@ -0,0 +1,124 @@ +//! This module greps parser's code for specially formatted comments and turns +//! them into tests. + +use std::{ + fs, iter, + path::{Path, PathBuf}, +}; + +use rustc_hash::FxHashMap; + +#[test] +fn sourcegen_parser_tests() { + let grammar_dir = sourcegen::project_root().join(Path::new("crates/parser/src/grammar")); + let tests = tests_from_dir(&grammar_dir); + + install_tests(&tests.ok, "crates/syntax/test_data/parser/inline/ok"); + install_tests(&tests.err, "crates/syntax/test_data/parser/inline/err"); + + fn install_tests(tests: &FxHashMap<String, Test>, into: &str) { + let tests_dir = sourcegen::project_root().join(into); + if !tests_dir.is_dir() { + fs::create_dir_all(&tests_dir).unwrap(); + } + // ok is never actually read, but it needs to be specified to create a Test in existing_tests + let existing = existing_tests(&tests_dir, true); + for t in existing.keys().filter(|&t| !tests.contains_key(t)) { + panic!("Test is deleted: {}", t); + } + + let mut new_idx = existing.len() + 1; + for (name, test) in tests { + let path = match existing.get(name) { + Some((path, _test)) => path.clone(), + None => { + let file_name = format!("{:04}_{}.rs", new_idx, name); + new_idx += 1; + tests_dir.join(file_name) + } + }; + sourcegen::ensure_file_contents(&path, &test.text); + } + } +} + +#[derive(Debug)] +struct Test { + name: String, + text: String, + ok: bool, +} + +#[derive(Default, Debug)] +struct Tests { + ok: FxHashMap<String, Test>, + err: FxHashMap<String, Test>, +} + +fn collect_tests(s: &str) -> Vec<Test> { + let mut res = Vec::new(); + for comment_block in sourcegen::CommentBlock::extract_untagged(s) { + let first_line = &comment_block.contents[0]; + let (name, ok) = if let Some(name) = first_line.strip_prefix("test ") { + (name.to_string(), true) + } else if let Some(name) = first_line.strip_prefix("test_err ") { + (name.to_string(), false) + } else { + continue; + }; + let text: String = comment_block.contents[1..] + .iter() + .cloned() + .chain(iter::once(String::new())) + .collect::<Vec<_>>() + .join("\n"); + assert!(!text.trim().is_empty() && text.ends_with('\n')); + res.push(Test { name, text, ok }) + } + res +} + +fn tests_from_dir(dir: &Path) -> Tests { + let mut res = Tests::default(); + for entry in sourcegen::list_rust_files(dir) { + process_file(&mut res, entry.as_path()); + } + let grammar_rs = dir.parent().unwrap().join("grammar.rs"); + process_file(&mut res, &grammar_rs); + return res; + + fn process_file(res: &mut Tests, path: &Path) { + let text = fs::read_to_string(path).unwrap(); + + for test in collect_tests(&text) { + if test.ok { + if let Some(old_test) = res.ok.insert(test.name.clone(), test) { + panic!("Duplicate test: {}", old_test.name); + } + } else if let Some(old_test) = res.err.insert(test.name.clone(), test) { + panic!("Duplicate test: {}", old_test.name); + } + } + } +} + +fn existing_tests(dir: &Path, ok: bool) -> FxHashMap<String, (PathBuf, Test)> { + let mut res = FxHashMap::default(); + for file in fs::read_dir(dir).unwrap() { + let file = file.unwrap(); + let path = file.path(); + if path.extension().unwrap_or_default() != "rs" { + continue; + } + let name = { + let file_name = path.file_name().unwrap().to_str().unwrap(); + file_name[5..file_name.len() - 3].to_string() + }; + let text = fs::read_to_string(&path).unwrap(); + let test = Test { name: name.clone(), text, ok }; + if let Some(old) = res.insert(name, (path, test)) { + println!("Duplicate test: {:?}", old); + } + } + res +} |
