about summary refs log tree commit diff
path: root/compiler/rustc_errors
diff options
context:
space:
mode:
authorEsteban Küber <esteban@kuber.com.ar>2021-08-19 11:40:00 -0700
committerPietro Albini <pietro@pietroalbini.org>2021-10-31 13:14:04 +0100
commitc0b134582a949062f9f3a8ba3def88b98a98df6a (patch)
treebc303a0c9cac3405bea3973e3ce075759b88c136 /compiler/rustc_errors
parent38b01d90657a355abf81b53cb3ee0b9a7dd88f98 (diff)
downloadrust-c0b134582a949062f9f3a8ba3def88b98a98df6a.tar.gz
rust-c0b134582a949062f9f3a8ba3def88b98a98df6a.zip
Lint against RTL unicode codepoints in literals and comments
Address CVE-2021-42574.
Diffstat (limited to 'compiler/rustc_errors')
-rw-r--r--compiler/rustc_errors/src/emitter.rs20
1 files changed, 19 insertions, 1 deletions
diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs
index e17604740f0..e16ff974122 100644
--- a/compiler/rustc_errors/src/emitter.rs
+++ b/compiler/rustc_errors/src/emitter.rs
@@ -2063,8 +2063,26 @@ fn num_decimal_digits(num: usize) -> usize {
     MAX_DIGITS
 }
 
+// We replace some characters so the CLI output is always consistent and underlines aligned.
+const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
+    ('\t', "    "),   // We do our own tab replacement
+    ('\u{202A}', ""), // The following unicode text flow control characters are inconsistently
+    ('\u{202B}', ""), // supported accross CLIs and can cause confusion due to the bytes on disk
+    ('\u{202D}', ""), // not corresponding to the visible source code, so we replace them always.
+    ('\u{202E}', ""),
+    ('\u{2066}', ""),
+    ('\u{2067}', ""),
+    ('\u{2068}', ""),
+    ('\u{202C}', ""),
+    ('\u{2069}', ""),
+];
+
 fn replace_tabs(str: &str) -> String {
-    str.replace('\t', "    ")
+    let mut s = str.to_string();
+    for (c, replacement) in OUTPUT_REPLACEMENTS {
+        s = s.replace(*c, replacement);
+    }
+    s
 }
 
 fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {