about summary refs log tree commit diff
diff options
context:
space:
mode:
authoroxalica <oxalicc@pm.me>2023-07-17 22:28:57 +0800
committeroxalica <oxalicc@pm.me>2023-07-17 22:52:58 +0800
commitde1f766820d58bd87a94e9f055fbf269a3946e1f (patch)
treedfdd28066ba7b77df7b7481420a6c4e429aba715
parent37f84c101bca43b11027f30ab0c2852f9325bc3d (diff)
downloadrust-de1f766820d58bd87a94e9f055fbf269a3946e1f.tar.gz
rust-de1f766820d58bd87a94e9f055fbf269a3946e1f.zip
Fix highlighting of byte escape sequences
Currently non-UTF8 escape sequences in byte strings and any escape
sequences in byte literals are ignored.
-rw-r--r--crates/ide/src/syntax_highlighting.rs10
-rw-r--r--crates/ide/src/syntax_highlighting/escape.rs22
-rw-r--r--crates/ide/src/syntax_highlighting/test_data/highlight_strings.html6
-rw-r--r--crates/ide/src/syntax_highlighting/tests.rs6
-rw-r--r--crates/syntax/src/ast/token_ext.rs14
5 files changed, 48 insertions, 10 deletions
diff --git a/crates/ide/src/syntax_highlighting.rs b/crates/ide/src/syntax_highlighting.rs
index 577bd2bc1f8..ae97236409e 100644
--- a/crates/ide/src/syntax_highlighting.rs
+++ b/crates/ide/src/syntax_highlighting.rs
@@ -24,7 +24,7 @@ use syntax::{
 
 use crate::{
     syntax_highlighting::{
-        escape::{highlight_escape_char, highlight_escape_string},
+        escape::{highlight_escape_byte, highlight_escape_char, highlight_escape_string},
         format::highlight_format_string,
         highlights::Highlights,
         macro_::MacroHighlighter,
@@ -471,6 +471,14 @@ fn traverse(
                 };
 
                 highlight_escape_char(hl, &char, range.start())
+            } else if ast::Byte::can_cast(token.kind())
+                && ast::Byte::can_cast(descended_token.kind())
+            {
+                let Some(byte) = ast::Byte::cast(token) else {
+                    continue;
+                };
+
+                highlight_escape_byte(hl, &byte, range.start())
             }
         }
 
diff --git a/crates/ide/src/syntax_highlighting/escape.rs b/crates/ide/src/syntax_highlighting/escape.rs
index 211e3588095..2c63a69bdcb 100644
--- a/crates/ide/src/syntax_highlighting/escape.rs
+++ b/crates/ide/src/syntax_highlighting/escape.rs
@@ -1,7 +1,7 @@
 //! Syntax highlighting for escape sequences
 use crate::syntax_highlighting::highlights::Highlights;
 use crate::{HlRange, HlTag};
-use syntax::ast::{Char, IsString};
+use syntax::ast::{Byte, Char, IsString};
 use syntax::{AstToken, TextRange, TextSize};
 
 pub(super) fn highlight_escape_string<T: IsString>(
@@ -43,3 +43,23 @@ pub(super) fn highlight_escape_char(stack: &mut Highlights, char: &Char, start:
         TextRange::new(start + TextSize::from(1), start + TextSize::from(text.len() as u32 + 1));
     stack.add(HlRange { range, highlight: HlTag::EscapeSequence.into(), binding_hash: None })
 }
+
+pub(super) fn highlight_escape_byte(stack: &mut Highlights, byte: &Byte, start: TextSize) {
+    if byte.value().is_none() {
+        return;
+    }
+
+    let text = byte.text();
+    if !text.starts_with("b'") || !text.ends_with('\'') {
+        return;
+    }
+
+    let text = &text[2..text.len() - 1];
+    if !text.starts_with('\\') {
+        return;
+    }
+
+    let range =
+        TextRange::new(start + TextSize::from(2), start + TextSize::from(text.len() as u32 + 2));
+    stack.add(HlRange { range, highlight: HlTag::EscapeSequence.into(), binding_hash: None })
+}
diff --git a/crates/ide/src/syntax_highlighting/test_data/highlight_strings.html b/crates/ide/src/syntax_highlighting/test_data/highlight_strings.html
index f4f164aa1de..061329d2397 100644
--- a/crates/ide/src/syntax_highlighting/test_data/highlight_strings.html
+++ b/crates/ide/src/syntax_highlighting/test_data/highlight_strings.html
@@ -105,6 +105,8 @@ pre                 { color: #DCDCCC; background: #3F3F3F; font-size: 22px; padd
     <span class="keyword">let</span> <span class="variable declaration">a</span> <span class="operator">=</span> <span class="char_literal">'</span><span class="escape_sequence">\x65</span><span class="char_literal">'</span><span class="semicolon">;</span>
     <span class="keyword">let</span> <span class="variable declaration">a</span> <span class="operator">=</span> <span class="char_literal">'</span><span class="escape_sequence">\x00</span><span class="char_literal">'</span><span class="semicolon">;</span>
 
+    <span class="keyword">let</span> <span class="variable declaration">a</span> <span class="operator">=</span> <span class="byte_literal">b'</span><span class="escape_sequence">\xFF</span><span class="byte_literal">'</span><span class="semicolon">;</span>
+
     <span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"Hello </span><span class="escape_sequence">{{</span><span class="string_literal macro">Hello</span><span class="escape_sequence">}}</span><span class="string_literal macro">"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
     <span class="comment">// from https://doc.rust-lang.org/std/fmt/index.html</span>
     <span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"Hello"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>                 <span class="comment">// =&gt; "Hello"</span>
@@ -159,8 +161,8 @@ pre                 { color: #DCDCCC; background: #3F3F3F; font-size: 22px; padd
     <span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"Hello</span><span class="escape_sequence">\n</span><span class="string_literal macro">World"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
     <span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"</span><span class="escape_sequence">\u{48}</span><span class="escape_sequence">\x65</span><span class="escape_sequence">\x6C</span><span class="escape_sequence">\x6C</span><span class="escape_sequence">\x6F</span><span class="string_literal macro"> World"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
 
-    <span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span>
-    <span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">b"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span>
+    <span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="string_literal">\xFF</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span> <span class="comment">// invalid non-UTF8 escape sequences</span>
+    <span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">b"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="escape_sequence">\xFF</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span> <span class="comment">// valid bytes</span>
     <span class="keyword">let</span> <span class="variable declaration reference">backslash</span> <span class="operator">=</span> <span class="string_literal">r"\\"</span><span class="semicolon">;</span>
 
     <span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"</span><span class="format_specifier">{</span><span class="escape_sequence">\x41</span><span class="format_specifier">}</span><span class="string_literal macro">"</span><span class="comma macro">,</span> <span class="none macro">A</span> <span class="operator macro">=</span> <span class="numeric_literal macro">92</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
diff --git a/crates/ide/src/syntax_highlighting/tests.rs b/crates/ide/src/syntax_highlighting/tests.rs
index 1ee451a06d0..80a49bcaa3b 100644
--- a/crates/ide/src/syntax_highlighting/tests.rs
+++ b/crates/ide/src/syntax_highlighting/tests.rs
@@ -451,6 +451,8 @@ fn main() {
     let a = '\x65';
     let a = '\x00';
 
+    let a = b'\xFF';
+
     println!("Hello {{Hello}}");
     // from https://doc.rust-lang.org/std/fmt/index.html
     println!("Hello");                 // => "Hello"
@@ -505,8 +507,8 @@ fn main() {
     println!("Hello\nWorld");
     println!("\u{48}\x65\x6C\x6C\x6F World");
 
-    let _ = "\x28\x28\x00\x63\n";
-    let _ = b"\x28\x28\x00\x63\n";
+    let _ = "\x28\x28\x00\x63\xFF\n"; // invalid non-UTF8 escape sequences
+    let _ = b"\x28\x28\x00\x63\xFF\n"; // valid bytes
     let backslash = r"\\";
 
     println!("{\x41}", A = 92);
diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs
index 090eb89f470..aa8c9bbc0f8 100644
--- a/crates/syntax/src/ast/token_ext.rs
+++ b/crates/syntax/src/ast/token_ext.rs
@@ -146,6 +146,7 @@ impl QuoteOffsets {
 
 pub trait IsString: AstToken {
     const RAW_PREFIX: &'static str;
+    const MODE: Mode;
     fn is_raw(&self) -> bool {
         self.text().starts_with(Self::RAW_PREFIX)
     }
@@ -181,7 +182,7 @@ pub trait IsString: AstToken {
         let text = &self.text()[text_range_no_quotes - start];
         let offset = text_range_no_quotes.start() - start;
 
-        unescape_literal(text, Mode::Str, &mut |range, unescaped_char| {
+        unescape_literal(text, Self::MODE, &mut |range, unescaped_char| {
             let text_range =
                 TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
             cb(text_range + offset, unescaped_char);
@@ -196,6 +197,7 @@ pub trait IsString: AstToken {
 
 impl IsString for ast::String {
     const RAW_PREFIX: &'static str = "r";
+    const MODE: Mode = Mode::Str;
 }
 
 impl ast::String {
@@ -213,7 +215,7 @@ impl ast::String {
         let mut buf = String::new();
         let mut prev_end = 0;
         let mut has_error = false;
-        unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match (
+        unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match (
             unescaped_char,
             buf.capacity() == 0,
         ) {
@@ -239,6 +241,7 @@ impl ast::String {
 
 impl IsString for ast::ByteString {
     const RAW_PREFIX: &'static str = "br";
+    const MODE: Mode = Mode::ByteStr;
 }
 
 impl ast::ByteString {
@@ -256,7 +259,7 @@ impl ast::ByteString {
         let mut buf: Vec<u8> = Vec::new();
         let mut prev_end = 0;
         let mut has_error = false;
-        unescape_literal(text, Mode::ByteStr, &mut |char_range, unescaped_char| match (
+        unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match (
             unescaped_char,
             buf.capacity() == 0,
         ) {
@@ -282,6 +285,9 @@ impl ast::ByteString {
 
 impl IsString for ast::CString {
     const RAW_PREFIX: &'static str = "cr";
+    // XXX: `Mode::CStr` is not supported by `unescape_literal` of ra-ap-rustc_lexer yet.
+    // Here we pretend it to be a byte string.
+    const MODE: Mode = Mode::ByteStr;
 }
 
 impl ast::CString {
@@ -299,7 +305,7 @@ impl ast::CString {
         let mut buf = String::new();
         let mut prev_end = 0;
         let mut has_error = false;
-        unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match (
+        unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match (
             unescaped_char,
             buf.capacity() == 0,
         ) {