about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAnton Golov <jesyspa@gmail.com>2021-07-30 16:09:33 +0200
committerAnton Golov <jesyspa@gmail.com>2021-07-30 16:26:39 +0200
commit5d59b4412e71298e6e44b55afbfaa9dd86aee590 (patch)
tree61861a3d3fa2d1f48f61b69793bf817e8e200364
parent1195bea5a7b73e079fa14b37ac7e375fc77d368a (diff)
downloadrust-5d59b4412e71298e6e44b55afbfaa9dd86aee590.tar.gz
rust-5d59b4412e71298e6e44b55afbfaa9dd86aee590.zip
Add warning when whitespace is not skipped after an escaped newline.
-rw-r--r--compiler/rustc_ast/src/util/literal.rs24
-rw-r--r--compiler/rustc_lexer/src/unescape.rs34
-rw-r--r--compiler/rustc_lexer/src/unescape/tests.rs19
-rw-r--r--compiler/rustc_parse/src/lexer/unescape_error_reporting.rs6
4 files changed, 75 insertions, 8 deletions
diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs
index 2124f1efb99..9c6ad47427d 100644
--- a/compiler/rustc_ast/src/util/literal.rs
+++ b/compiler/rustc_ast/src/util/literal.rs
@@ -63,7 +63,11 @@ impl LitKind {
                         unescape_literal(&s, Mode::Str, &mut |_, unescaped_char| {
                             match unescaped_char {
                                 Ok(c) => buf.push(c),
-                                Err(_) => error = Err(LitError::LexerError),
+                                Err(err) => {
+                                    if err.is_fatal() {
+                                        error = Err(LitError::LexerError);
+                                    }
+                                }
                             }
                         });
                         error?;
@@ -83,7 +87,11 @@ impl LitKind {
                         unescape_literal(&s, Mode::RawStr, &mut |_, unescaped_char| {
                             match unescaped_char {
                                 Ok(c) => buf.push(c),
-                                Err(_) => error = Err(LitError::LexerError),
+                                Err(err) => {
+                                    if err.is_fatal() {
+                                        error = Err(LitError::LexerError);
+                                    }
+                                }
                             }
                         });
                         error?;
@@ -100,7 +108,11 @@ impl LitKind {
                 unescape_byte_literal(&s, Mode::ByteStr, &mut |_, unescaped_byte| {
                     match unescaped_byte {
                         Ok(c) => buf.push(c),
-                        Err(_) => error = Err(LitError::LexerError),
+                        Err(err) => {
+                            if err.is_fatal() {
+                                error = Err(LitError::LexerError);
+                            }
+                        }
                     }
                 });
                 error?;
@@ -114,7 +126,11 @@ impl LitKind {
                     unescape_byte_literal(&s, Mode::RawByteStr, &mut |_, unescaped_byte| {
                         match unescaped_byte {
                             Ok(c) => buf.push(c),
-                            Err(_) => error = Err(LitError::LexerError),
+                            Err(err) => {
+                                if err.is_fatal() {
+                                    error = Err(LitError::LexerError);
+                                }
+                            }
                         }
                     });
                     error?;
diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs
index b4dd0fc2449..9a96c03cd3c 100644
--- a/compiler/rustc_lexer/src/unescape.rs
+++ b/compiler/rustc_lexer/src/unescape.rs
@@ -7,7 +7,7 @@ use std::str::Chars;
 #[cfg(test)]
 mod tests;
 
-/// Errors that can occur during string unescaping.
+/// Errors and warnings that can occur during string unescaping.
 #[derive(Debug, PartialEq, Eq)]
 pub enum EscapeError {
     /// Expected 1 char, but 0 were found.
@@ -56,6 +56,20 @@ pub enum EscapeError {
     NonAsciiCharInByte,
     /// Non-ascii character in byte string literal.
     NonAsciiCharInByteString,
+
+    /// After a line ending with '\', the next line contains whitespace
+    /// characters that are not skipped.
+    UnskippedWhitespaceWarning,
+}
+
+impl EscapeError {
+    /// Returns true for actual errors, as opposed to warnings.
+    pub fn is_fatal(&self) -> bool {
+        match self {
+            EscapeError::UnskippedWhitespaceWarning => false,
+            _ => true,
+        }
+    }
 }
 
 /// Takes a contents of a literal (without quotes) and produces a
@@ -283,7 +297,7 @@ where
                         // if unescaped '\' character is followed by '\n'.
                         // For details see [Rust language reference]
                         // (https://doc.rust-lang.org/reference/tokens.html#string-literals).
-                        skip_ascii_whitespace(&mut chars);
+                        skip_ascii_whitespace(&mut chars, start, callback);
                         continue;
                     }
                     _ => scan_escape(first_char, &mut chars, mode),
@@ -297,13 +311,25 @@ where
         callback(start..end, unescaped_char);
     }
 
-    fn skip_ascii_whitespace(chars: &mut Chars<'_>) {
+    fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
+    where
+        F: FnMut(Range<usize>, Result<char, EscapeError>),
+    {
         let str = chars.as_str();
         let first_non_space = str
             .bytes()
             .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
             .unwrap_or(str.len());
-        *chars = str[first_non_space..].chars()
+        let tail = &str[first_non_space..];
+        if let Some(c) = tail.chars().nth(0) {
+            // For error reporting, we would like the span to contain the character that was not
+            // skipped.  The +1 is necessary to account for the leading \ that started the escape.
+            let end = start + first_non_space + c.len_utf8() + 1;
+            if c.is_whitespace() {
+                callback(start..end, Err(EscapeError::UnskippedWhitespaceWarning));
+            }
+        }
+        *chars = tail.chars();
     }
 }
 
diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs
index f2b751a78f2..1f4dbb20f4e 100644
--- a/compiler/rustc_lexer/src/unescape/tests.rs
+++ b/compiler/rustc_lexer/src/unescape/tests.rs
@@ -99,6 +99,25 @@ fn test_unescape_char_good() {
 }
 
 #[test]
+fn test_unescape_str_warn() {
+    fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
+        let mut unescaped = Vec::with_capacity(literal.len());
+        unescape_literal(literal, Mode::Str, &mut |range, res| unescaped.push((range, res)));
+        assert_eq!(unescaped, expected);
+    }
+
+    check(
+        "\\\n \u{a0} x",
+        &[
+            (0..5, Err(EscapeError::UnskippedWhitespaceWarning)),
+            (3..5, Ok('\u{a0}')),
+            (5..6, Ok(' ')),
+            (6..7, Ok('x')),
+        ],
+    );
+}
+
+#[test]
 fn test_unescape_str_good() {
     fn check(literal_text: &str, expected: &str) {
         let mut buf = Ok(String::with_capacity(literal_text.len()));
diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
index a580f0c55d0..1c5be61130b 100644
--- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
+++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
@@ -253,6 +253,12 @@ pub(crate) fn emit_unescape_error(
             let msg = "invalid trailing slash in literal";
             handler.struct_span_err(span, msg).span_label(span, msg).emit();
         }
+        EscapeError::UnskippedWhitespaceWarning => {
+            let (c, char_span) = last_char();
+            let msg =
+                format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode());
+            handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit();
+        }
     }
 }