From caf8bcceff301b4fa3414e3f21813581a8d758a3 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 7 May 2022 09:34:57 -0700 Subject: Optimize `Wtf8Buf::into_string` for the case where it contains UTF-8. Add a `is_known_utf8` flag to `Wtf8Buf`, which tracks whether the string is known to contain UTF-8. This is efficiently computed in many common situations, such as when a `Wtf8Buf` is constructed from a `String` or `&str`, or with `Wtf8Buf::from_wide` which is already doing UTF-16 decoding and already checking for surrogates. This makes `OsString::into_string` O(1) rather than O(N) on Windows in common cases. And, it eliminates the need to scan through the string for surrogates in `Args::next` and `Vars::next`, because the strings are already being translated with `Wtf8Buf::from_wide`. Many things on Windows construct `OsString`s with `Wtf8Buf::from_wide`, such as `DirEntry::file_name` and `fs::read_link`, so with this patch, users of those functions can subsequently call `.into_string()` without paying for an extra scan through the string for surrogates. --- library/std/src/sys/windows/os_str.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'library/std/src/sys') diff --git a/library/std/src/sys/windows/os_str.rs b/library/std/src/sys/windows/os_str.rs index 11883f15022..4bdd8c505ff 100644 --- a/library/std/src/sys/windows/os_str.rs +++ b/library/std/src/sys/windows/os_str.rs @@ -164,9 +164,7 @@ impl Slice { } pub fn to_owned(&self) -> Buf { - let mut buf = Wtf8Buf::with_capacity(self.inner.len()); - buf.push_wtf8(&self.inner); - Buf { inner: buf } + Buf { inner: self.inner.to_owned() } } pub fn clone_into(&self, buf: &mut Buf) { -- cgit 1.4.1-3-g733a5