about summary refs log tree commit diff
diff options
context:
space:
mode:
authorTamir Duberstein <tamird@google.com>2021-03-26 10:39:42 -0400
committerTamir Duberstein <tamird@google.com>2021-06-06 08:06:56 -0400
commit977903bb1135e22dc7c33aee0096c383f93aa719 (patch)
tree4efe1a16a6db0b634b576246898dcee3d831c8da
parent38013e708eb6ec5abe9992078857b147ff3b58d5 (diff)
downloadrust-977903bb1135e22dc7c33aee0096c383f93aa719.tar.gz
rust-977903bb1135e22dc7c33aee0096c383f93aa719.zip
String::remove_matches O(n^2) -> O(n)
Copy only non-matching bytes.
-rw-r--r--library/alloc/src/string.rs53
1 files changed, 38 insertions, 15 deletions
diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs
index 4aa2be2b178..1fb645a302e 100644
--- a/library/alloc/src/string.rs
+++ b/library/alloc/src/string.rs
@@ -1290,26 +1290,49 @@ impl String {
     {
         use core::str::pattern::Searcher;
 
-        let matches: Vec<_> = {
+        let rejections = {
             let mut searcher = pat.into_searcher(self);
-            from_fn(|| searcher.next_match()).collect()
+            // Per Searcher::next:
+            //
+            // A Match result needs to contain the whole matched pattern,
+            // however Reject results may be split up into arbitrary many
+            // adjacent fragments. Both ranges may have zero length.
+            //
+            // In practice the implementation of Searcher::next_match tends to
+            // be more efficient, so we use it here and do some work to invert
+            // matches into rejections since that's what we want to copy below.
+            let mut front = 0;
+            let rejections: Vec<_> = from_fn(|| {
+                let (start, end) = searcher.next_match()?;
+                let prev_front = front;
+                front = end;
+                Some((prev_front, start))
+            })
+            .collect();
+            rejections.into_iter().chain(core::iter::once((front, self.len())))
         };
 
-        let len = self.len();
-        let mut shrunk_by = 0;
+        let mut len = 0;
+        let ptr = self.vec.as_mut_ptr();
+
+        for (start, end) in rejections {
+            let count = end - start;
+            if start != len {
+                // SAFETY: per Searcher::next:
+                //
+                // The stream of Match and Reject values up to a Done will
+                // contain index ranges that are adjacent, non-overlapping,
+                // covering the whole haystack, and laying on utf8
+                // boundaries.
+                unsafe {
+                    ptr::copy(ptr.add(start), ptr.add(len), count);
+                }
+            }
+            len += count;
+        }
 
-        // SAFETY: start and end will be on utf8 byte boundaries per
-        // the Searcher docs
         unsafe {
-            for (start, end) in matches {
-                ptr::copy(
-                    self.vec.as_mut_ptr().add(end - shrunk_by),
-                    self.vec.as_mut_ptr().add(start - shrunk_by),
-                    len - end,
-                );
-                shrunk_by += end - start;
-            }
-            self.vec.set_len(len - shrunk_by);
+            self.vec.set_len(len);
         }
     }