about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2022-05-31 09:34:00 +0000
committerbors <bors@rust-lang.org>2022-05-31 09:34:00 +0000
commitdcbd5f5134e396353207f1f79e50794b83cee7d2 (patch)
treead8a495092c69694241f4b3de984d52ad0eb79bf
parentd35d972e6974d40d30362344ea619a5b560aae20 (diff)
parent3358a41acbcb82daf3cd71197b2a053152a9a376 (diff)
downloadrust-dcbd5f5134e396353207f1f79e50794b83cee7d2.tar.gz
rust-dcbd5f5134e396353207f1f79e50794b83cee7d2.zip
Auto merge of #97526 - Nilstrieb:unicode-is-printable-fastpath, r=joshtriplett
Add unicode fast path to `is_printable`

Before, it would enter the full expensive check even for normal ascii characters. Now, it skips the check for the ascii characters in `32..127`. This range was checked manually from the current behavior.

I ran the `tracing` test suite in miri, and it was really slow. I looked at a profile, and miri spent most of the time in `core::char::methods::escape_debug_ext`, where half of that was dominated by `core::unicode::printable::is_printable`. So I optimized it here.

The tracing profile:
![The tracing profile](https://user-images.githubusercontent.com/48135649/170883650-23876e7b-3fd1-4e8b-9001-47672e06d914.svg)
-rw-r--r--library/core/benches/fmt.rs11
-rwxr-xr-xlibrary/core/src/unicode/printable.py11
-rw-r--r--library/core/src/unicode/printable.rs11
3 files changed, 29 insertions, 4 deletions
diff --git a/library/core/benches/fmt.rs b/library/core/benches/fmt.rs
index 9df66263459..ff726ff7559 100644
--- a/library/core/benches/fmt.rs
+++ b/library/core/benches/fmt.rs
@@ -110,6 +110,17 @@ fn write_str_macro_debug(bh: &mut Bencher) {
 }
 
 #[bench]
+fn write_str_macro_debug_ascii(bh: &mut Bencher) {
+    bh.iter(|| {
+        let mut mem = String::new();
+        let wr = &mut mem as &mut dyn fmt::Write;
+        for _ in 0..1000 {
+            write!(wr, "{:?}", "Hello, World!").unwrap();
+        }
+    });
+}
+
+#[bench]
 fn write_u128_max(bh: &mut Bencher) {
     bh.iter(|| {
         test::black_box(format!("{}", u128::MAX));
diff --git a/library/core/src/unicode/printable.py b/library/core/src/unicode/printable.py
index c42850d2324..7c37f5f099c 100755
--- a/library/core/src/unicode/printable.py
+++ b/library/core/src/unicode/printable.py
@@ -170,7 +170,7 @@ def main():
     normal1 = compress_normal(normal1)
 
     print("""\
-// NOTE: The following code was generated by "src/libcore/unicode/printable.py",
+// NOTE: The following code was generated by "library/core/src/unicode/printable.py",
 //       do not edit directly!
 
 fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &[u8]) -> bool {
@@ -211,7 +211,14 @@ fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &
 pub(crate) fn is_printable(x: char) -> bool {
     let x = x as u32;
     let lower = x as u16;
-    if x < 0x10000 {
+
+    if x < 32 {
+        // ASCII fast path
+        false
+    } else if x < 127 {
+        // ASCII fast path
+        true
+    } else if x < 0x10000 {
         check(lower, SINGLETONS0U, SINGLETONS0L, NORMAL0)
     } else if x < 0x20000 {
         check(lower, SINGLETONS1U, SINGLETONS1L, NORMAL1)
diff --git a/library/core/src/unicode/printable.rs b/library/core/src/unicode/printable.rs
index 1502b3160bc..31cf88a4149 100644
--- a/library/core/src/unicode/printable.rs
+++ b/library/core/src/unicode/printable.rs
@@ -1,4 +1,4 @@
-// NOTE: The following code was generated by "src/libcore/unicode/printable.py",
+// NOTE: The following code was generated by "library/core/src/unicode/printable.py",
 //       do not edit directly!
 
 fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &[u8]) -> bool {
@@ -39,7 +39,14 @@ fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &
 pub(crate) fn is_printable(x: char) -> bool {
     let x = x as u32;
     let lower = x as u16;
-    if x < 0x10000 {
+
+    if x < 32 {
+        // ASCII fast path
+        false
+    } else if x < 127 {
+        // ASCII fast path
+        true
+    } else if x < 0x10000 {
         check(lower, SINGLETONS0U, SINGLETONS0L, NORMAL0)
     } else if x < 0x20000 {
         check(lower, SINGLETONS1U, SINGLETONS1L, NORMAL1)