about summary refs log tree commit diff
path: root/library/alloc/src
diff options
context:
space:
mode:
authorLoïc BRANSTETT <lolo.branstett@numericable.fr>2022-05-01 19:25:14 +0200
committerLoïc BRANSTETT <lolo.branstett@numericable.fr>2022-05-01 19:25:14 +0200
commita98abe83eb42b2f537e8a2d7706ba08d9e296f31 (patch)
tree5f267b53c4ed408c056f8273f6f8f80c70c57312 /library/alloc/src
parent0e7915d11f6888f005e78c2358fcdc48ff655753 (diff)
downloadrust-a98abe83eb42b2f537e8a2d7706ba08d9e296f31.tar.gz
rust-a98abe83eb42b2f537e8a2d7706ba08d9e296f31.zip
Improve codegen of String::retain method.
Using unwrap_unchecked helps the optimizer to not generate panicking
path, that will never be taken for valid UTF-8 like string.

Using encode_utf8 saves us a call to a memcpy, as the optimizer is
unable to realize that ch_len <= 4 and so can generate much better
assembly code.

https://rust.godbolt.org/z/z73ohenfc
Diffstat (limited to 'library/alloc/src')
-rw-r--r--library/alloc/src/string.rs25
1 files changed, 17 insertions, 8 deletions
diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs
index e97c1637fd5..71e6f0450e6 100644
--- a/library/alloc/src/string.rs
+++ b/library/alloc/src/string.rs
@@ -1466,19 +1466,28 @@ impl String {
         let mut guard = SetLenOnDrop { s: self, idx: 0, del_bytes: 0 };
 
         while guard.idx < len {
-            let ch = unsafe { guard.s.get_unchecked(guard.idx..len).chars().next().unwrap() };
+            let ch =
+                // SAFETY: `guard.idx` is positive-or-zero and less that len so the `get_unchecked`
+                // is in bound. `self` is valid UTF-8 like string and the returned slice starts at
+                // a unicode code point so the `Chars` always return one character.
+                unsafe { guard.s.get_unchecked(guard.idx..len).chars().next().unwrap_unchecked() };
             let ch_len = ch.len_utf8();
 
             if !f(ch) {
                 guard.del_bytes += ch_len;
             } else if guard.del_bytes > 0 {
-                unsafe {
-                    ptr::copy(
-                        guard.s.vec.as_ptr().add(guard.idx),
-                        guard.s.vec.as_mut_ptr().add(guard.idx - guard.del_bytes),
-                        ch_len,
-                    );
-                }
+                // SAFETY: `guard.idx` is in bound and `guard.del_bytes` represent the number of
+                // bytes that are erased from the string so the resulting `guard.idx -
+                // guard.del_bytes` always represent a valid unicode code point.
+                //
+                // `guard.del_bytes` >= `ch.len_utf8()`, so taking a slice with `ch.len_utf8()` len
+                // is safe.
+                ch.encode_utf8(unsafe {
+                    crate::slice::from_raw_parts_mut(
+                        guard.s.as_mut_ptr().add(guard.idx - guard.del_bytes),
+                        ch.len_utf8(),
+                    )
+                });
             }
 
             // Point idx to the next char