diff options
| author | Hans Kratz <hans@appfour.com> | 2021-11-04 23:31:42 +0100 |
|---|---|---|
| committer | Hans Kratz <hans@appfour.com> | 2021-11-04 23:31:42 +0100 |
| commit | 7885233df01abf51d2947b6b466a17a1843b2a60 (patch) | |
| tree | 8483003df35a60ab830520c724da48e024ea4efe /compiler/rustc_ast/src/util/unicode.rs | |
| parent | a5b25a2cfa1adb52723fa4a5b458dd9d6272117a (diff) | |
| download | rust-7885233df01abf51d2947b6b466a17a1843b2a60.tar.gz rust-7885233df01abf51d2947b6b466a17a1843b2a60.zip | |
Optimize literal, doc comment lint as well, extract function.
Diffstat (limited to 'compiler/rustc_ast/src/util/unicode.rs')
| -rw-r--r-- | compiler/rustc_ast/src/util/unicode.rs | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/compiler/rustc_ast/src/util/unicode.rs b/compiler/rustc_ast/src/util/unicode.rs new file mode 100644 index 00000000000..ad73d6e4fe2 --- /dev/null +++ b/compiler/rustc_ast/src/util/unicode.rs @@ -0,0 +1,44 @@ +pub const TEXT_FLOW_CONTROL_CHARS: &[char] = &[ + '\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{202C}', + '\u{2069}', +]; + +#[inline] +pub fn contains_text_flow_control_chars(s: &str) -> bool { + // Char - UTF-8 + // U+202A - E2 80 AA + // U+202B - E2 80 AB + // U+202C - E2 80 AC + // U+202D - E2 80 AD + // U+202E - E2 80 AE + // U+2066 - E2 81 A6 + // U+2067 - E2 81 A7 + // U+2068 - E2 81 A8 + // U+2069 - E2 81 A9 + let mut bytes = s.as_bytes(); + loop { + match core::slice::memchr::memchr(0xE2, &bytes) { + Some(idx) => { + // bytes are valid UTF-8 -> E2 must be followed by two bytes + let ch = &bytes[idx..idx + 3]; + match ch[1] { + 0x80 => { + if (0xAA..=0xAE).contains(&ch[2]) { + break true; + } + } + 0x81 => { + if (0xA6..=0xA9).contains(&ch[2]) { + break true; + } + } + _ => {} + } + bytes = &bytes[idx + 3..]; + } + None => { + break false; + } + } + } +} |
