diff options
| author | bors <bors@rust-lang.org> | 2025-04-09 17:58:25 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2025-04-09 17:58:25 +0000 |
| commit | 934880f586f6ac1f952c7090e2a943fcd7775e7b (patch) | |
| tree | 3091aa692f773533762cee3882a837248246f0a3 /library/alloc/src/string.rs | |
| parent | 48f89e7659678f91a68c0c2d868180a0036ab32d (diff) | |
| parent | ff248de852c46fc0fccd31e466b1f2137381d1d6 (diff) | |
| download | rust-934880f586f6ac1f952c7090e2a943fcd7775e7b.tar.gz rust-934880f586f6ac1f952c7090e2a943fcd7775e7b.zip | |
Auto merge of #124810 - lincot:speed-up-string-push-and-string-insert, r=tgross35
speed up `String::push` and `String::insert` Addresses the concerns described in #116235. The performance gain comes mainly from avoiding temporary buffers. Complex pattern matching in `encode_utf8` (introduced in #67569) has been simplified to a comparison and an exhaustive `match` in the `encode_utf8_raw_unchecked` helper function. It takes a slice of `MaybeUninit<u8>` because otherwise we'd have to construct a normal slice to uninitialized data, which is not desirable, I guess. Several functions still have that [unneeded zeroing](https://rust.godbolt.org/z/5oKfMPo7j), but a single instruction is not that important, I guess. `@rustbot` label T-libs C-optimization A-str
Diffstat (limited to 'library/alloc/src/string.rs')
| -rw-r--r-- | library/alloc/src/string.rs | 65 |
1 files changed, 47 insertions, 18 deletions
diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs index 9236f5cb8d1..5193a0b2d4b 100644 --- a/library/alloc/src/string.rs +++ b/library/alloc/src/string.rs @@ -1401,11 +1401,14 @@ impl String { #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn push(&mut self, ch: char) { - match ch.len_utf8() { - 1 => self.vec.push(ch as u8), - _ => { - self.vec.extend_from_slice(ch.encode_utf8(&mut [0; char::MAX_LEN_UTF8]).as_bytes()) - } + let len = self.len(); + let ch_len = ch.len_utf8(); + self.reserve(ch_len); + + // SAFETY: Just reserved capacity for at least the length needed to encode `ch`. + unsafe { + core::char::encode_utf8_raw_unchecked(ch as u32, self.vec.as_mut_ptr().add(self.len())); + self.vec.set_len(len + ch_len); } } @@ -1702,24 +1705,31 @@ impl String { #[rustc_confusables("set")] pub fn insert(&mut self, idx: usize, ch: char) { assert!(self.is_char_boundary(idx)); - let mut bits = [0; char::MAX_LEN_UTF8]; - let bits = ch.encode_utf8(&mut bits).as_bytes(); + let len = self.len(); + let ch_len = ch.len_utf8(); + self.reserve(ch_len); + + // SAFETY: Move the bytes starting from `idx` to their new location `ch_len` + // bytes ahead. This is safe because sufficient capacity was reserved, and `idx` + // is a char boundary. unsafe { - self.insert_bytes(idx, bits); + ptr::copy( + self.vec.as_ptr().add(idx), + self.vec.as_mut_ptr().add(idx + ch_len), + len - idx, + ); } - } - #[cfg(not(no_global_oom_handling))] - unsafe fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) { - let len = self.len(); - let amt = bytes.len(); - self.vec.reserve(amt); + // SAFETY: Encode the character into the vacated region if `idx != len`, + // or into the uninitialized spare capacity otherwise. + unsafe { + core::char::encode_utf8_raw_unchecked(ch as u32, self.vec.as_mut_ptr().add(idx)); + } + // SAFETY: Update the length to include the newly added bytes. unsafe { - ptr::copy(self.vec.as_ptr().add(idx), self.vec.as_mut_ptr().add(idx + amt), len - idx); - ptr::copy_nonoverlapping(bytes.as_ptr(), self.vec.as_mut_ptr().add(idx), amt); - self.vec.set_len(len + amt); + self.vec.set_len(len + ch_len); } } @@ -1749,8 +1759,27 @@ impl String { pub fn insert_str(&mut self, idx: usize, string: &str) { assert!(self.is_char_boundary(idx)); + let len = self.len(); + let amt = string.len(); + self.reserve(amt); + + // SAFETY: Move the bytes starting from `idx` to their new location `amt` bytes + // ahead. This is safe because sufficient capacity was just reserved, and `idx` + // is a char boundary. + unsafe { + ptr::copy(self.vec.as_ptr().add(idx), self.vec.as_mut_ptr().add(idx + amt), len - idx); + } + + // SAFETY: Copy the new string slice into the vacated region if `idx != len`, + // or into the uninitialized spare capacity otherwise. The borrow checker + // ensures that the source and destination do not overlap. + unsafe { + ptr::copy_nonoverlapping(string.as_ptr(), self.vec.as_mut_ptr().add(idx), amt); + } + + // SAFETY: Update the length to include the newly added bytes. unsafe { - self.insert_bytes(idx, string.as_bytes()); + self.vec.set_len(len + amt); } } |
