Auto merge of #88781 - estebank:emoji-idents, r=oli-obk

Tokenize emoji as if they were valid identifiers In the lexer, consider emojis to be valid identifiers and reject them later to avoid knock down parse errors. Partially address #86102.
author: bors <bors@rust-lang.org> 2021-11-25 08:16:08 +0000
committer: bors <bors@rust-lang.org> 2021-11-25 08:16:08 +0000
commit: 23a436606b118bd2fbb12f64fce21e7f9d355349 (patch)
tree: f7cdcfcb705dc416fd967deb4e89ad3184f282c4 /compiler/rustc_errors
parent: c6eda7d8a7af3ef51311d3106874a7d8de994edc (diff)
parent: d92916439c372967e4c12b7ece3c8d7e860a8777 (diff)
download: rust-23a436606b118bd2fbb12f64fce21e7f9d355349.tar.gz
rust-23a436606b118bd2fbb12f64fce21e7f9d355349.zip
1 files changed, 7 insertions, 6 deletions
diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs
index 6b79962ddd6..d64a589bd9b 100644
--- a/compiler/rustc_errors/src/emitter.rs
+++ b/compiler/rustc_errors/src/emitter.rs
@@ -730,7 +730,7 @@ impl EmitterWriter {
         }
 
         let source_string = match file.get_line(line.line_index - 1) {
-            Some(s) => replace_tabs(&*s),
+            Some(s) => normalize_whitespace(&*s),
             None => return Vec::new(),
         };
 
@@ -1286,7 +1286,7 @@ impl EmitterWriter {
             }
             for &(ref text, _) in msg.iter() {
                 // Account for newlines to align output to its label.
-                for (line, text) in replace_tabs(text).lines().enumerate() {
+                for (line, text) in normalize_whitespace(text).lines().enumerate() {
                     buffer.append(
                         0 + line,
                         &format!(
@@ -1550,7 +1550,7 @@ impl EmitterWriter {
 
                             self.draw_line(
                                 &mut buffer,
-                                &replace_tabs(&unannotated_line),
+                                &normalize_whitespace(&unannotated_line),
                                 annotated_file.lines[line_idx + 1].line_index - 1,
                                 last_buffer_line_num,
                                 width_offset,
@@ -1672,7 +1672,7 @@ impl EmitterWriter {
                     buffer.puts(
                         row_num - 1,
                         max_line_num_len + 3,
-                        &replace_tabs(
+                        &normalize_whitespace(
                             &*file_lines
                                 .file
                                 .get_line(file_lines.lines[line_pos].line_index)
@@ -1698,7 +1698,7 @@ impl EmitterWriter {
                 }
 
                 // print the suggestion
-                buffer.append(row_num, &replace_tabs(line), Style::NoStyle);
+                buffer.append(row_num, &normalize_whitespace(line), Style::NoStyle);
 
                 // Colorize addition/replacements with green.
                 for &SubstitutionHighlight { start, end } in highlight_parts {
@@ -2081,6 +2081,7 @@ fn num_decimal_digits(num: usize) -> usize {
 // We replace some characters so the CLI output is always consistent and underlines aligned.
 const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
     ('\t', "    "),   // We do our own tab replacement
+    ('\u{200D}', ""), // Replace ZWJ with nothing for consistent terminal output of grapheme clusters.
     ('\u{202A}', ""), // The following unicode text flow control characters are inconsistently
     ('\u{202B}', ""), // supported accross CLIs and can cause confusion due to the bytes on disk
     ('\u{202D}', ""), // not corresponding to the visible source code, so we replace them always.
@@ -2092,7 +2093,7 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
     ('\u{2069}', ""),
 ];
 
-fn replace_tabs(str: &str) -> String {
+fn normalize_whitespace(str: &str) -> String {
     let mut s = str.to_string();
     for (c, replacement) in OUTPUT_REPLACEMENTS {
         s = s.replace(*c, replacement);
author	bors <bors@rust-lang.org>	2021-11-25 08:16:08 +0000
committer	bors <bors@rust-lang.org>	2021-11-25 08:16:08 +0000
commit	23a436606b118bd2fbb12f64fce21e7f9d355349 (patch)
tree	f7cdcfcb705dc416fd967deb4e89ad3184f282c4 /compiler/rustc_errors
parent	c6eda7d8a7af3ef51311d3106874a7d8de994edc (diff)
parent	d92916439c372967e4c12b7ece3c8d7e860a8777 (diff)
download	rust-23a436606b118bd2fbb12f64fce21e7f9d355349.tar.gz rust-23a436606b118bd2fbb12f64fce21e7f9d355349.zip