about summary refs log tree commit diff
path: root/compiler/rustc_fluent_macro
diff options
context:
space:
mode:
authorNilstrieb <48135649+Nilstrieb@users.noreply.github.com>2023-04-16 14:33:00 +0200
committerNilstrieb <48135649+Nilstrieb@users.noreply.github.com>2023-04-18 18:56:22 +0000
commitb5d3d970fa64c25eecfbd8ebbae601a2c6cb2cb3 (patch)
tree0e19df4016facff72f38ad739a7de6b5ef7667c5 /compiler/rustc_fluent_macro
parentde96f3d8735b70d5dc1ca178aaee198b329b8f3d (diff)
downloadrust-b5d3d970fa64c25eecfbd8ebbae601a2c6cb2cb3.tar.gz
rust-b5d3d970fa64c25eecfbd8ebbae601a2c6cb2cb3.zip
Add `rustc_fluent_macro` to decouple fluent from `rustc_macros`
Fluent, with all the icu4x it brings in, takes quite some time to
compile. `fluent_messages!` is only needed in further downstream rustc
crates, but is blocking more upstream crates like `rustc_index`. By
splitting it out, we allow `rustc_macros` to be compiled earlier, which
speeds up `x check compiler` by about 5 seconds (and even more after the
needless dependency on `serde_json` is removed from
`rustc_data_structures`).
Diffstat (limited to 'compiler/rustc_fluent_macro')
-rw-r--r--compiler/rustc_fluent_macro/Cargo.toml17
-rw-r--r--compiler/rustc_fluent_macro/src/fluent.rs336
-rw-r--r--compiler/rustc_fluent_macro/src/lib.rs64
3 files changed, 417 insertions, 0 deletions
diff --git a/compiler/rustc_fluent_macro/Cargo.toml b/compiler/rustc_fluent_macro/Cargo.toml
new file mode 100644
index 00000000000..a45559af713
--- /dev/null
+++ b/compiler/rustc_fluent_macro/Cargo.toml
@@ -0,0 +1,17 @@
+[package]
+name = "rustc_fluent_macro"
+version = "0.1.0"
+edition = "2021"
+
+[lib]
+proc-macro = true
+
+[dependencies]
+annotate-snippets = "0.9"
+fluent-bundle = "0.15.2"
+fluent-syntax = "0.11"
+synstructure = "0.13.0"
+syn = { version = "2", features = ["full"] }
+proc-macro2 = "1"
+quote = "1"
+unic-langid = { version = "0.9.0", features = ["macros"] }
diff --git a/compiler/rustc_fluent_macro/src/fluent.rs b/compiler/rustc_fluent_macro/src/fluent.rs
new file mode 100644
index 00000000000..9dffc9a7645
--- /dev/null
+++ b/compiler/rustc_fluent_macro/src/fluent.rs
@@ -0,0 +1,336 @@
+use annotate_snippets::{
+    display_list::DisplayList,
+    snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation},
+};
+use fluent_bundle::{FluentBundle, FluentError, FluentResource};
+use fluent_syntax::{
+    ast::{
+        Attribute, Entry, Expression, Identifier, InlineExpression, Message, Pattern,
+        PatternElement,
+    },
+    parser::ParserError,
+};
+use proc_macro::{Diagnostic, Level, Span};
+use proc_macro2::TokenStream;
+use quote::quote;
+use std::{
+    collections::{HashMap, HashSet},
+    fs::read_to_string,
+    path::{Path, PathBuf},
+};
+use syn::{parse_macro_input, Ident, LitStr};
+use unic_langid::langid;
+
+/// Helper function for returning an absolute path for macro-invocation relative file paths.
+///
+/// If the input is already absolute, then the input is returned. If the input is not absolute,
+/// then it is appended to the directory containing the source file with this macro invocation.
+fn invocation_relative_path_to_absolute(span: Span, path: &str) -> PathBuf {
+    let path = Path::new(path);
+    if path.is_absolute() {
+        path.to_path_buf()
+    } else {
+        // `/a/b/c/foo/bar.rs` contains the current macro invocation
+        let mut source_file_path = span.source_file().path();
+        // `/a/b/c/foo/`
+        source_file_path.pop();
+        // `/a/b/c/foo/../locales/en-US/example.ftl`
+        source_file_path.push(path);
+        source_file_path
+    }
+}
+
+/// Tokens to be returned when the macro cannot proceed.
+fn failed(crate_name: &Ident) -> proc_macro::TokenStream {
+    quote! {
+        pub static DEFAULT_LOCALE_RESOURCE: &'static str = "";
+
+        #[allow(non_upper_case_globals)]
+        #[doc(hidden)]
+        pub(crate) mod fluent_generated {
+            pub mod #crate_name {
+            }
+
+            pub mod _subdiag {
+                pub const help: crate::SubdiagnosticMessage =
+                    crate::SubdiagnosticMessage::FluentAttr(std::borrow::Cow::Borrowed("help"));
+                pub const note: crate::SubdiagnosticMessage =
+                    crate::SubdiagnosticMessage::FluentAttr(std::borrow::Cow::Borrowed("note"));
+                pub const warn: crate::SubdiagnosticMessage =
+                    crate::SubdiagnosticMessage::FluentAttr(std::borrow::Cow::Borrowed("warn"));
+                pub const label: crate::SubdiagnosticMessage =
+                    crate::SubdiagnosticMessage::FluentAttr(std::borrow::Cow::Borrowed("label"));
+                pub const suggestion: crate::SubdiagnosticMessage =
+                    crate::SubdiagnosticMessage::FluentAttr(std::borrow::Cow::Borrowed("suggestion"));
+            }
+        }
+    }
+    .into()
+}
+
+/// See [rustc_fluent_macro::fluent_messages].
+pub(crate) fn fluent_messages(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
+    let crate_name = std::env::var("CARGO_PKG_NAME")
+        // If `CARGO_PKG_NAME` is missing, then we're probably running in a test, so use
+        // `no_crate`.
+        .unwrap_or_else(|_| "no_crate".to_string())
+        .replace("rustc_", "");
+
+    // Cannot iterate over individual messages in a bundle, so do that using the
+    // `FluentResource` instead. Construct a bundle anyway to find out if there are conflicting
+    // messages in the resources.
+    let mut bundle = FluentBundle::new(vec![langid!("en-US")]);
+
+    // Set of Fluent attribute names already output, to avoid duplicate type errors - any given
+    // constant created for a given attribute is the same.
+    let mut previous_attrs = HashSet::new();
+
+    let resource_str = parse_macro_input!(input as LitStr);
+    let resource_span = resource_str.span().unwrap();
+    let relative_ftl_path = resource_str.value();
+    let absolute_ftl_path = invocation_relative_path_to_absolute(resource_span, &relative_ftl_path);
+
+    let crate_name = Ident::new(&crate_name, resource_str.span());
+
+    // As this macro also outputs an `include_str!` for this file, the macro will always be
+    // re-executed when the file changes.
+    let resource_contents = match read_to_string(absolute_ftl_path) {
+        Ok(resource_contents) => resource_contents,
+        Err(e) => {
+            Diagnostic::spanned(
+                resource_span,
+                Level::Error,
+                format!("could not open Fluent resource: {e}"),
+            )
+            .emit();
+            return failed(&crate_name);
+        }
+    };
+    let mut bad = false;
+    for esc in ["\\n", "\\\"", "\\'"] {
+        for _ in resource_contents.matches(esc) {
+            bad = true;
+            Diagnostic::spanned(resource_span, Level::Error, format!("invalid escape `{esc}` in Fluent resource"))
+                .note("Fluent does not interpret these escape sequences (<https://projectfluent.org/fluent/guide/special.html>)")
+                .emit();
+        }
+    }
+    if bad {
+        return failed(&crate_name);
+    }
+
+    let resource = match FluentResource::try_new(resource_contents) {
+        Ok(resource) => resource,
+        Err((this, errs)) => {
+            Diagnostic::spanned(resource_span, Level::Error, "could not parse Fluent resource")
+                .help("see additional errors emitted")
+                .emit();
+            for ParserError { pos, slice: _, kind } in errs {
+                let mut err = kind.to_string();
+                // Entirely unnecessary string modification so that the error message starts
+                // with a lowercase as rustc errors do.
+                err.replace_range(0..1, &err.chars().next().unwrap().to_lowercase().to_string());
+
+                let line_starts: Vec<usize> = std::iter::once(0)
+                    .chain(
+                        this.source()
+                            .char_indices()
+                            .filter_map(|(i, c)| Some(i + 1).filter(|_| c == '\n')),
+                    )
+                    .collect();
+                let line_start = line_starts
+                    .iter()
+                    .enumerate()
+                    .map(|(line, idx)| (line + 1, idx))
+                    .filter(|(_, idx)| **idx <= pos.start)
+                    .last()
+                    .unwrap()
+                    .0;
+
+                let snippet = Snippet {
+                    title: Some(Annotation {
+                        label: Some(&err),
+                        id: None,
+                        annotation_type: AnnotationType::Error,
+                    }),
+                    footer: vec![],
+                    slices: vec![Slice {
+                        source: this.source(),
+                        line_start,
+                        origin: Some(&relative_ftl_path),
+                        fold: true,
+                        annotations: vec![SourceAnnotation {
+                            label: "",
+                            annotation_type: AnnotationType::Error,
+                            range: (pos.start, pos.end - 1),
+                        }],
+                    }],
+                    opt: Default::default(),
+                };
+                let dl = DisplayList::from(snippet);
+                eprintln!("{dl}\n");
+            }
+
+            return failed(&crate_name);
+        }
+    };
+
+    let mut constants = TokenStream::new();
+    let mut previous_defns = HashMap::new();
+    let mut message_refs = Vec::new();
+    for entry in resource.entries() {
+        if let Entry::Message(Message { id: Identifier { name }, attributes, value, .. }) = entry {
+            let _ = previous_defns.entry(name.to_string()).or_insert(resource_span);
+            if name.contains('-') {
+                Diagnostic::spanned(
+                    resource_span,
+                    Level::Error,
+                    format!("name `{name}` contains a '-' character"),
+                )
+                .help("replace any '-'s with '_'s")
+                .emit();
+            }
+
+            if let Some(Pattern { elements }) = value {
+                for elt in elements {
+                    if let PatternElement::Placeable {
+                        expression:
+                            Expression::Inline(InlineExpression::MessageReference { id, .. }),
+                    } = elt
+                    {
+                        message_refs.push((id.name, *name));
+                    }
+                }
+            }
+
+            // `typeck_foo_bar` => `foo_bar` (in `typeck.ftl`)
+            // `const_eval_baz` => `baz` (in `const_eval.ftl`)
+            // `const-eval-hyphen-having` => `hyphen_having` (in `const_eval.ftl`)
+            // The last case we error about above, but we want to fall back gracefully
+            // so that only the error is being emitted and not also one about the macro
+            // failing.
+            let crate_prefix = format!("{crate_name}_");
+
+            let snake_name = name.replace('-', "_");
+            if !snake_name.starts_with(&crate_prefix) {
+                Diagnostic::spanned(
+                    resource_span,
+                    Level::Error,
+                    format!("name `{name}` does not start with the crate name"),
+                )
+                .help(format!(
+                    "prepend `{crate_prefix}` to the slug name: `{crate_prefix}{snake_name}`"
+                ))
+                .emit();
+            };
+            let snake_name = Ident::new(&snake_name, resource_str.span());
+
+            if !previous_attrs.insert(snake_name.clone()) {
+                continue;
+            }
+
+            let msg = format!("Constant referring to Fluent message `{name}` from `{crate_name}`");
+            constants.extend(quote! {
+                #[doc = #msg]
+                pub const #snake_name: crate::DiagnosticMessage =
+                    crate::DiagnosticMessage::FluentIdentifier(
+                        std::borrow::Cow::Borrowed(#name),
+                        None
+                    );
+            });
+
+            for Attribute { id: Identifier { name: attr_name }, .. } in attributes {
+                let snake_name = Ident::new(
+                    &format!("{}{}", &crate_prefix, &attr_name.replace('-', "_")),
+                    resource_str.span(),
+                );
+                if !previous_attrs.insert(snake_name.clone()) {
+                    continue;
+                }
+
+                if attr_name.contains('-') {
+                    Diagnostic::spanned(
+                        resource_span,
+                        Level::Error,
+                        format!("attribute `{attr_name}` contains a '-' character"),
+                    )
+                    .help("replace any '-'s with '_'s")
+                    .emit();
+                }
+
+                let msg = format!(
+                    "Constant referring to Fluent message `{name}.{attr_name}` from `{crate_name}`"
+                );
+                constants.extend(quote! {
+                    #[doc = #msg]
+                    pub const #snake_name: crate::SubdiagnosticMessage =
+                        crate::SubdiagnosticMessage::FluentAttr(
+                            std::borrow::Cow::Borrowed(#attr_name)
+                        );
+                });
+            }
+        }
+    }
+
+    for (mref, name) in message_refs.into_iter() {
+        if !previous_defns.contains_key(mref) {
+            Diagnostic::spanned(
+                resource_span,
+                Level::Error,
+                format!("referenced message `{mref}` does not exist (in message `{name}`)"),
+            )
+            .help(&format!("you may have meant to use a variable reference (`{{${mref}}}`)"))
+            .emit();
+        }
+    }
+
+    if let Err(errs) = bundle.add_resource(resource) {
+        for e in errs {
+            match e {
+                FluentError::Overriding { kind, id } => {
+                    Diagnostic::spanned(
+                        resource_span,
+                        Level::Error,
+                        format!("overrides existing {kind}: `{id}`"),
+                    )
+                    .emit();
+                }
+                FluentError::ResolverError(_) | FluentError::ParserError(_) => unreachable!(),
+            }
+        }
+    }
+
+    quote! {
+        /// Raw content of Fluent resource for this crate, generated by `fluent_messages` macro,
+        /// imported by `rustc_driver` to include all crates' resources in one bundle.
+        pub static DEFAULT_LOCALE_RESOURCE: &'static str = include_str!(#relative_ftl_path);
+
+        #[allow(non_upper_case_globals)]
+        #[doc(hidden)]
+        /// Auto-generated constants for type-checked references to Fluent messages.
+        pub(crate) mod fluent_generated {
+            #constants
+
+            /// Constants expected to exist by the diagnostic derive macros to use as default Fluent
+            /// identifiers for different subdiagnostic kinds.
+            pub mod _subdiag {
+                /// Default for `#[help]`
+                pub const help: crate::SubdiagnosticMessage =
+                    crate::SubdiagnosticMessage::FluentAttr(std::borrow::Cow::Borrowed("help"));
+                /// Default for `#[note]`
+                pub const note: crate::SubdiagnosticMessage =
+                    crate::SubdiagnosticMessage::FluentAttr(std::borrow::Cow::Borrowed("note"));
+                /// Default for `#[warn]`
+                pub const warn: crate::SubdiagnosticMessage =
+                    crate::SubdiagnosticMessage::FluentAttr(std::borrow::Cow::Borrowed("warn"));
+                /// Default for `#[label]`
+                pub const label: crate::SubdiagnosticMessage =
+                    crate::SubdiagnosticMessage::FluentAttr(std::borrow::Cow::Borrowed("label"));
+                /// Default for `#[suggestion]`
+                pub const suggestion: crate::SubdiagnosticMessage =
+                    crate::SubdiagnosticMessage::FluentAttr(std::borrow::Cow::Borrowed("suggestion"));
+            }
+        }
+    }
+    .into()
+}
diff --git a/compiler/rustc_fluent_macro/src/lib.rs b/compiler/rustc_fluent_macro/src/lib.rs
new file mode 100644
index 00000000000..a01643cd67d
--- /dev/null
+++ b/compiler/rustc_fluent_macro/src/lib.rs
@@ -0,0 +1,64 @@
+#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
+#![feature(proc_macro_diagnostic)]
+#![feature(proc_macro_span)]
+#![deny(rustc::untranslatable_diagnostic)]
+#![deny(rustc::diagnostic_outside_of_impl)]
+#![allow(rustc::default_hash_types)]
+
+use proc_macro::TokenStream;
+
+mod fluent;
+
+/// Implements the `fluent_messages` macro, which performs compile-time validation of the
+/// compiler's Fluent resources (i.e. that the resources parse and don't multiply define the same
+/// messages) and generates constants that make using those messages in diagnostics more ergonomic.
+///
+/// For example, given the following invocation of the macro..
+///
+/// ```ignore (rust)
+/// fluent_messages! { "./typeck.ftl" }
+/// ```
+/// ..where `typeck.ftl` has the following contents..
+///
+/// ```fluent
+/// typeck_field_multiply_specified_in_initializer =
+///     field `{$ident}` specified more than once
+///     .label = used more than once
+///     .label_previous_use = first use of `{$ident}`
+/// ```
+/// ...then the macro parse the Fluent resource, emitting a diagnostic if it fails to do so, and
+/// will generate the following code:
+///
+/// ```ignore (rust)
+/// pub static DEFAULT_LOCALE_RESOURCE: &'static [&'static str] = include_str!("./typeck.ftl");
+///
+/// mod fluent_generated {
+///     mod typeck {
+///         pub const field_multiply_specified_in_initializer: DiagnosticMessage =
+///             DiagnosticMessage::fluent("typeck_field_multiply_specified_in_initializer");
+///         pub const field_multiply_specified_in_initializer_label_previous_use: DiagnosticMessage =
+///             DiagnosticMessage::fluent_attr(
+///                 "typeck_field_multiply_specified_in_initializer",
+///                 "previous_use_label"
+///             );
+///     }
+/// }
+/// ```
+/// When emitting a diagnostic, the generated constants can be used as follows:
+///
+/// ```ignore (rust)
+/// let mut err = sess.struct_span_err(
+///     span,
+///     fluent::typeck::field_multiply_specified_in_initializer
+/// );
+/// err.span_default_label(span);
+/// err.span_label(
+///     previous_use_span,
+///     fluent::typeck::field_multiply_specified_in_initializer_label_previous_use
+/// );
+/// err.emit();
+/// ```
+#[proc_macro]
+pub fn fluent_messages(input: TokenStream) -> TokenStream {
+    fluent::fluent_messages(input)
+}