diff options
| author | Loïc BRANSTETT <lolo.branstett@numericable.fr> | 2022-05-01 19:25:14 +0200 |
|---|---|---|
| committer | Loïc BRANSTETT <lolo.branstett@numericable.fr> | 2022-05-01 19:25:14 +0200 |
| commit | a98abe83eb42b2f537e8a2d7706ba08d9e296f31 (patch) | |
| tree | 5f267b53c4ed408c056f8273f6f8f80c70c57312 /library/alloc/src | |
| parent | 0e7915d11f6888f005e78c2358fcdc48ff655753 (diff) | |
| download | rust-a98abe83eb42b2f537e8a2d7706ba08d9e296f31.tar.gz rust-a98abe83eb42b2f537e8a2d7706ba08d9e296f31.zip | |
Improve codegen of String::retain method.
Using unwrap_unchecked helps the optimizer to not generate panicking path, that will never be taken for valid UTF-8 like string. Using encode_utf8 saves us a call to a memcpy, as the optimizer is unable to realize that ch_len <= 4 and so can generate much better assembly code. https://rust.godbolt.org/z/z73ohenfc
Diffstat (limited to 'library/alloc/src')
| -rw-r--r-- | library/alloc/src/string.rs | 25 |
1 files changed, 17 insertions, 8 deletions
diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs index e97c1637fd5..71e6f0450e6 100644 --- a/library/alloc/src/string.rs +++ b/library/alloc/src/string.rs @@ -1466,19 +1466,28 @@ impl String { let mut guard = SetLenOnDrop { s: self, idx: 0, del_bytes: 0 }; while guard.idx < len { - let ch = unsafe { guard.s.get_unchecked(guard.idx..len).chars().next().unwrap() }; + let ch = + // SAFETY: `guard.idx` is positive-or-zero and less that len so the `get_unchecked` + // is in bound. `self` is valid UTF-8 like string and the returned slice starts at + // a unicode code point so the `Chars` always return one character. + unsafe { guard.s.get_unchecked(guard.idx..len).chars().next().unwrap_unchecked() }; let ch_len = ch.len_utf8(); if !f(ch) { guard.del_bytes += ch_len; } else if guard.del_bytes > 0 { - unsafe { - ptr::copy( - guard.s.vec.as_ptr().add(guard.idx), - guard.s.vec.as_mut_ptr().add(guard.idx - guard.del_bytes), - ch_len, - ); - } + // SAFETY: `guard.idx` is in bound and `guard.del_bytes` represent the number of + // bytes that are erased from the string so the resulting `guard.idx - + // guard.del_bytes` always represent a valid unicode code point. + // + // `guard.del_bytes` >= `ch.len_utf8()`, so taking a slice with `ch.len_utf8()` len + // is safe. + ch.encode_utf8(unsafe { + crate::slice::from_raw_parts_mut( + guard.s.as_mut_ptr().add(guard.idx - guard.del_bytes), + ch.len_utf8(), + ) + }); } // Point idx to the next char |
