about summary refs log tree commit diff
path: root/compiler/rustc_ast/src/util/unicode.rs
diff options
context:
space:
mode:
authorHans Kratz <hans@appfour.com>2021-11-04 23:31:42 +0100
committerHans Kratz <hans@appfour.com>2021-11-04 23:31:42 +0100
commit7885233df01abf51d2947b6b466a17a1843b2a60 (patch)
tree8483003df35a60ab830520c724da48e024ea4efe /compiler/rustc_ast/src/util/unicode.rs
parenta5b25a2cfa1adb52723fa4a5b458dd9d6272117a (diff)
downloadrust-7885233df01abf51d2947b6b466a17a1843b2a60.tar.gz
rust-7885233df01abf51d2947b6b466a17a1843b2a60.zip
Optimize literal, doc comment lint as well, extract function.
Diffstat (limited to 'compiler/rustc_ast/src/util/unicode.rs')
-rw-r--r--compiler/rustc_ast/src/util/unicode.rs44
1 files changed, 44 insertions, 0 deletions
diff --git a/compiler/rustc_ast/src/util/unicode.rs b/compiler/rustc_ast/src/util/unicode.rs
new file mode 100644
index 00000000000..ad73d6e4fe2
--- /dev/null
+++ b/compiler/rustc_ast/src/util/unicode.rs
@@ -0,0 +1,44 @@
+pub const TEXT_FLOW_CONTROL_CHARS: &[char] = &[
+    '\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{202C}',
+    '\u{2069}',
+];
+
+#[inline]
+pub fn contains_text_flow_control_chars(s: &str) -> bool {
+    // Char   - UTF-8
+    // U+202A - E2 80 AA
+    // U+202B - E2 80 AB
+    // U+202C - E2 80 AC
+    // U+202D - E2 80 AD
+    // U+202E - E2 80 AE
+    // U+2066 - E2 81 A6
+    // U+2067 - E2 81 A7
+    // U+2068 - E2 81 A8
+    // U+2069 - E2 81 A9
+    let mut bytes = s.as_bytes();
+    loop {
+        match core::slice::memchr::memchr(0xE2, &bytes) {
+            Some(idx) => {
+                // bytes are valid UTF-8 -> E2 must be followed by two bytes
+                let ch = &bytes[idx..idx + 3];
+                match ch[1] {
+                    0x80 => {
+                        if (0xAA..=0xAE).contains(&ch[2]) {
+                            break true;
+                        }
+                    }
+                    0x81 => {
+                        if (0xA6..=0xA9).contains(&ch[2]) {
+                            break true;
+                        }
+                    }
+                    _ => {}
+                }
+                bytes = &bytes[idx + 3..];
+            }
+            None => {
+                break false;
+            }
+        }
+    }
+}