diff options
| author | Murarth <murarth@gmail.com> | 2017-06-13 15:52:59 -0700 |
|---|---|---|
| committer | Murarth <murarth@gmail.com> | 2017-06-13 23:37:34 -0700 |
| commit | eadda7665eb31b1e7cb94a503b4d5cf5c75474c0 (patch) | |
| tree | 406691dc732c762e1424f5110fcbfca97f0b1302 /src/liballoc/string.rs | |
| parent | e40ef964fe491b19c22dfb8dd36d1eab14223c36 (diff) | |
| download | rust-eadda7665eb31b1e7cb94a503b4d5cf5c75474c0.tar.gz rust-eadda7665eb31b1e7cb94a503b4d5cf5c75474c0.zip | |
Merge crate `collections` into `alloc`
Diffstat (limited to 'src/liballoc/string.rs')
| -rw-r--r-- | src/liballoc/string.rs | 2287 |
1 files changed, 2287 insertions, 0 deletions
diff --git a/src/liballoc/string.rs b/src/liballoc/string.rs new file mode 100644 index 00000000000..1d98626e90b --- /dev/null +++ b/src/liballoc/string.rs @@ -0,0 +1,2287 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! A UTF-8 encoded, growable string. +//! +//! This module contains the [`String`] type, a trait for converting +//! [`ToString`]s, and several error types that may result from working with +//! [`String`]s. +//! +//! [`ToString`]: trait.ToString.html +//! +//! # Examples +//! +//! There are multiple ways to create a new [`String`] from a string literal: +//! +//! ``` +//! let s = "Hello".to_string(); +//! +//! let s = String::from("world"); +//! let s: String = "also this".into(); +//! ``` +//! +//! You can create a new [`String`] from an existing one by concatenating with +//! `+`: +//! +//! [`String`]: struct.String.html +//! +//! ``` +//! let s = "Hello".to_string(); +//! +//! let message = s + " world!"; +//! ``` +//! +//! If you have a vector of valid UTF-8 bytes, you can make a `String` out of +//! it. You can do the reverse too. +//! +//! ``` +//! let sparkle_heart = vec![240, 159, 146, 150]; +//! +//! // We know these bytes are valid, so we'll use `unwrap()`. +//! let sparkle_heart = String::from_utf8(sparkle_heart).unwrap(); +//! +//! assert_eq!("💖", sparkle_heart); +//! +//! let bytes = sparkle_heart.into_bytes(); +//! +//! assert_eq!(bytes, [240, 159, 146, 150]); +//! ``` + +#![stable(feature = "rust1", since = "1.0.0")] + +use core::fmt; +use core::hash; +use core::iter::{FromIterator, FusedIterator}; +use core::ops::{self, Add, AddAssign, Index, IndexMut}; +use core::ptr; +use core::str as core_str; +use core::str::pattern::Pattern; +use std_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER}; + +use borrow::{Cow, ToOwned}; +use range::RangeArgument; +use Bound::{Excluded, Included, Unbounded}; +use str::{self, from_boxed_utf8_unchecked, FromStr, Utf8Error, Chars}; +use vec::Vec; +use boxed::Box; + +/// A UTF-8 encoded, growable string. +/// +/// The `String` type is the most common string type that has ownership over the +/// contents of the string. It has a close relationship with its borrowed +/// counterpart, the primitive [`str`]. +/// +/// [`str`]: ../../std/primitive.str.html +/// +/// # Examples +/// +/// You can create a `String` from a literal string with `String::from`: +/// +/// ``` +/// let hello = String::from("Hello, world!"); +/// ``` +/// +/// You can append a [`char`] to a `String` with the [`push`] method, and +/// append a [`&str`] with the [`push_str`] method: +/// +/// ``` +/// let mut hello = String::from("Hello, "); +/// +/// hello.push('w'); +/// hello.push_str("orld!"); +/// ``` +/// +/// [`char`]: ../../std/primitive.char.html +/// [`push`]: #method.push +/// [`push_str`]: #method.push_str +/// +/// If you have a vector of UTF-8 bytes, you can create a `String` from it with +/// the [`from_utf8`] method: +/// +/// ``` +/// // some bytes, in a vector +/// let sparkle_heart = vec![240, 159, 146, 150]; +/// +/// // We know these bytes are valid, so we'll use `unwrap()`. +/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap(); +/// +/// assert_eq!("💖", sparkle_heart); +/// ``` +/// +/// [`from_utf8`]: #method.from_utf8 +/// +/// # UTF-8 +/// +/// `String`s are always valid UTF-8. This has a few implications, the first of +/// which is that if you need a non-UTF-8 string, consider [`OsString`]. It is +/// similar, but without the UTF-8 constraint. The second implication is that +/// you cannot index into a `String`: +/// +/// ```ignore +/// let s = "hello"; +/// +/// println!("The first letter of s is {}", s[0]); // ERROR!!! +/// ``` +/// +/// [`OsString`]: ../../std/ffi/struct.OsString.html +/// +/// Indexing is intended to be a constant-time operation, but UTF-8 encoding +/// does not allow us to do this. Furthermore, it's not clear what sort of +/// thing the index should return: a byte, a codepoint, or a grapheme cluster. +/// The [`bytes`] and [`chars`] methods return iterators over the first +/// two, respectively. +/// +/// [`bytes`]: #method.bytes +/// [`chars`]: #method.chars +/// +/// # Deref +/// +/// `String`s implement [`Deref`]`<Target=str>`, and so inherit all of [`str`]'s +/// methods. In addition, this means that you can pass a `String` to any +/// function which takes a [`&str`] by using an ampersand (`&`): +/// +/// ``` +/// fn takes_str(s: &str) { } +/// +/// let s = String::from("Hello"); +/// +/// takes_str(&s); +/// ``` +/// +/// [`&str`]: ../../std/primitive.str.html +/// [`Deref`]: ../../std/ops/trait.Deref.html +/// +/// This will create a [`&str`] from the `String` and pass it in. This +/// conversion is very inexpensive, and so generally, functions will accept +/// [`&str`]s as arguments unless they need a `String` for some specific reason. +/// +/// +/// # Representation +/// +/// A `String` is made up of three components: a pointer to some bytes, a +/// length, and a capacity. The pointer points to an internal buffer `String` +/// uses to store its data. The length is the number of bytes currently stored +/// in the buffer, and the capacity is the size of the buffer in bytes. As such, +/// the length will always be less than or equal to the capacity. +/// +/// This buffer is always stored on the heap. +/// +/// You can look at these with the [`as_ptr`], [`len`], and [`capacity`] +/// methods: +/// +/// ``` +/// use std::mem; +/// +/// let story = String::from("Once upon a time..."); +/// +/// let ptr = story.as_ptr(); +/// let len = story.len(); +/// let capacity = story.capacity(); +/// +/// // story has nineteen bytes +/// assert_eq!(19, len); +/// +/// // Now that we have our parts, we throw the story away. +/// mem::forget(story); +/// +/// // We can re-build a String out of ptr, len, and capacity. This is all +/// // unsafe because we are responsible for making sure the components are +/// // valid: +/// let s = unsafe { String::from_raw_parts(ptr as *mut _, len, capacity) } ; +/// +/// assert_eq!(String::from("Once upon a time..."), s); +/// ``` +/// +/// [`as_ptr`]: #method.as_ptr +/// [`len`]: #method.len +/// [`capacity`]: #method.capacity +/// +/// If a `String` has enough capacity, adding elements to it will not +/// re-allocate. For example, consider this program: +/// +/// ``` +/// let mut s = String::new(); +/// +/// println!("{}", s.capacity()); +/// +/// for _ in 0..5 { +/// s.push_str("hello"); +/// println!("{}", s.capacity()); +/// } +/// ``` +/// +/// This will output the following: +/// +/// ```text +/// 0 +/// 5 +/// 10 +/// 20 +/// 20 +/// 40 +/// ``` +/// +/// At first, we have no memory allocated at all, but as we append to the +/// string, it increases its capacity appropriately. If we instead use the +/// [`with_capacity`] method to allocate the correct capacity initially: +/// +/// ``` +/// let mut s = String::with_capacity(25); +/// +/// println!("{}", s.capacity()); +/// +/// for _ in 0..5 { +/// s.push_str("hello"); +/// println!("{}", s.capacity()); +/// } +/// ``` +/// +/// [`with_capacity`]: #method.with_capacity +/// +/// We end up with a different output: +/// +/// ```text +/// 25 +/// 25 +/// 25 +/// 25 +/// 25 +/// 25 +/// ``` +/// +/// Here, there's no need to allocate more memory inside the loop. +#[derive(PartialOrd, Eq, Ord)] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct String { + vec: Vec<u8>, +} + +/// A possible error value when converting a `String` from a UTF-8 byte vector. +/// +/// This type is the error type for the [`from_utf8`] method on [`String`]. It +/// is designed in such a way to carefully avoid reallocations: the +/// [`into_bytes`] method will give back the byte vector that was used in the +/// conversion attempt. +/// +/// [`from_utf8`]: struct.String.html#method.from_utf8 +/// [`String`]: struct.String.html +/// [`into_bytes`]: struct.FromUtf8Error.html#method.into_bytes +/// +/// The [`Utf8Error`] type provided by [`std::str`] represents an error that may +/// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's +/// an analogue to `FromUtf8Error`, and you can get one from a `FromUtf8Error` +/// through the [`utf8_error`] method. +/// +/// [`Utf8Error`]: ../../std/str/struct.Utf8Error.html +/// [`std::str`]: ../../std/str/index.html +/// [`u8`]: ../../std/primitive.u8.html +/// [`&str`]: ../../std/primitive.str.html +/// [`utf8_error`]: #method.utf8_error +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// // some invalid bytes, in a vector +/// let bytes = vec![0, 159]; +/// +/// let value = String::from_utf8(bytes); +/// +/// assert!(value.is_err()); +/// assert_eq!(vec![0, 159], value.unwrap_err().into_bytes()); +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +#[derive(Debug)] +pub struct FromUtf8Error { + bytes: Vec<u8>, + error: Utf8Error, +} + +/// A possible error value when converting a `String` from a UTF-16 byte slice. +/// +/// This type is the error type for the [`from_utf16`] method on [`String`]. +/// +/// [`from_utf16`]: struct.String.html#method.from_utf16 +/// [`String`]: struct.String.html +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// // 𝄞mu<invalid>ic +/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, +/// 0xD800, 0x0069, 0x0063]; +/// +/// assert!(String::from_utf16(v).is_err()); +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +#[derive(Debug)] +pub struct FromUtf16Error(()); + +impl String { + /// Creates a new empty `String`. + /// + /// Given that the `String` is empty, this will not allocate any initial + /// buffer. While that means that this initial operation is very + /// inexpensive, but may cause excessive allocation later, when you add + /// data. If you have an idea of how much data the `String` will hold, + /// consider the [`with_capacity`] method to prevent excessive + /// re-allocation. + /// + /// [`with_capacity`]: #method.with_capacity + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::new(); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn new() -> String { + String { vec: Vec::new() } + } + + /// Creates a new empty `String` with a particular capacity. + /// + /// `String`s have an internal buffer to hold their data. The capacity is + /// the length of that buffer, and can be queried with the [`capacity`] + /// method. This method creates an empty `String`, but one with an initial + /// buffer that can hold `capacity` bytes. This is useful when you may be + /// appending a bunch of data to the `String`, reducing the number of + /// reallocations it needs to do. + /// + /// [`capacity`]: #method.capacity + /// + /// If the given capacity is `0`, no allocation will occur, and this method + /// is identical to the [`new`] method. + /// + /// [`new`]: #method.new + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::with_capacity(10); + /// + /// // The String contains no chars, even though it has capacity for more + /// assert_eq!(s.len(), 0); + /// + /// // These are all done without reallocating... + /// let cap = s.capacity(); + /// for i in 0..10 { + /// s.push('a'); + /// } + /// + /// assert_eq!(s.capacity(), cap); + /// + /// // ...but this may make the vector reallocate + /// s.push('a'); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn with_capacity(capacity: usize) -> String { + String { vec: Vec::with_capacity(capacity) } + } + + // HACK(japaric): with cfg(test) the inherent `[T]::to_vec` method, which is + // required for this method definition, is not available. Since we don't + // require this method for testing purposes, I'll just stub it + // NB see the slice::hack module in slice.rs for more information + #[inline] + #[cfg(test)] + pub fn from_str(_: &str) -> String { + panic!("not available with cfg(test)"); + } + + /// Converts a vector of bytes to a `String`. + /// + /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a vector of bytes + /// ([`Vec<u8>`]) is made of bytes, so this function converts between the + /// two. Not all byte slices are valid `String`s, however: `String` + /// requires that it is valid UTF-8. `from_utf8()` checks to ensure that + /// the bytes are valid UTF-8, and then does the conversion. + /// + /// [`&str`]: ../../std/primitive.str.html + /// [`u8`]: ../../std/primitive.u8.html + /// [`Vec<u8>`]: ../../std/vec/struct.Vec.html + /// + /// If you are sure that the byte slice is valid UTF-8, and you don't want + /// to incur the overhead of the validity check, there is an unsafe version + /// of this function, [`from_utf8_unchecked`], which has the same behavior + /// but skips the check. + /// + /// [`from_utf8_unchecked`]: struct.String.html#method.from_utf8_unchecked + /// + /// This method will take care to not copy the vector, for efficiency's + /// sake. + /// + /// If you need a `&str` instead of a `String`, consider + /// [`str::from_utf8`]. + /// + /// [`str::from_utf8`]: ../../std/str/fn.from_utf8.html + /// + /// The inverse of this method is [`as_bytes`]. + /// + /// [`as_bytes`]: #method.as_bytes + /// + /// # Errors + /// + /// Returns `Err` if the slice is not UTF-8 with a description as to why the + /// provided bytes are not UTF-8. The vector you moved in is also included. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some bytes, in a vector + /// let sparkle_heart = vec![240, 159, 146, 150]; + /// + /// // We know these bytes are valid, so we'll use `unwrap()`. + /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap(); + /// + /// assert_eq!("💖", sparkle_heart); + /// ``` + /// + /// Incorrect bytes: + /// + /// ``` + /// // some invalid bytes, in a vector + /// let sparkle_heart = vec![0, 159, 146, 150]; + /// + /// assert!(String::from_utf8(sparkle_heart).is_err()); + /// ``` + /// + /// See the docs for [`FromUtf8Error`] for more details on what you can do + /// with this error. + /// + /// [`FromUtf8Error`]: struct.FromUtf8Error.html + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf8(vec: Vec<u8>) -> Result<String, FromUtf8Error> { + match str::from_utf8(&vec) { + Ok(..) => Ok(String { vec: vec }), + Err(e) => { + Err(FromUtf8Error { + bytes: vec, + error: e, + }) + } + } + } + + /// Converts a slice of bytes to a string, including invalid characters. + /// + /// Strings are made of bytes ([`u8`]), and a slice of bytes + /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts + /// between the two. Not all byte slices are valid strings, however: strings + /// are required to be valid UTF-8. During this conversion, + /// `from_utf8_lossy()` will replace any invalid UTF-8 sequences with + /// `U+FFFD REPLACEMENT CHARACTER`, which looks like this: � + /// + /// [`u8`]: ../../std/primitive.u8.html + /// [byteslice]: ../../std/primitive.slice.html + /// + /// If you are sure that the byte slice is valid UTF-8, and you don't want + /// to incur the overhead of the conversion, there is an unsafe version + /// of this function, [`from_utf8_unchecked`], which has the same behavior + /// but skips the checks. + /// + /// [`from_utf8_unchecked`]: struct.String.html#method.from_utf8_unchecked + /// + /// This function returns a [`Cow<'a, str>`]. If our byte slice is invalid + /// UTF-8, then we need to insert the replacement characters, which will + /// change the size of the string, and hence, require a `String`. But if + /// it's already valid UTF-8, we don't need a new allocation. This return + /// type allows us to handle both cases. + /// + /// [`Cow<'a, str>`]: ../../std/borrow/enum.Cow.html + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some bytes, in a vector + /// let sparkle_heart = vec![240, 159, 146, 150]; + /// + /// let sparkle_heart = String::from_utf8_lossy(&sparkle_heart); + /// + /// assert_eq!("💖", sparkle_heart); + /// ``` + /// + /// Incorrect bytes: + /// + /// ``` + /// // some invalid bytes + /// let input = b"Hello \xF0\x90\x80World"; + /// let output = String::from_utf8_lossy(input); + /// + /// assert_eq!("Hello �World", output); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> Cow<'a, str> { + let mut i; + match str::from_utf8(v) { + Ok(s) => return Cow::Borrowed(s), + Err(e) => i = e.valid_up_to(), + } + + const TAG_CONT_U8: u8 = 128; + const REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8 + let total = v.len(); + fn unsafe_get(xs: &[u8], i: usize) -> u8 { + unsafe { *xs.get_unchecked(i) } + } + fn safe_get(xs: &[u8], i: usize, total: usize) -> u8 { + if i >= total { 0 } else { unsafe_get(xs, i) } + } + + let mut res = String::with_capacity(total); + + if i > 0 { + unsafe { res.as_mut_vec().extend_from_slice(&v[..i]) }; + } + + // subseqidx is the index of the first byte of the subsequence we're + // looking at. It's used to copy a bunch of contiguous good codepoints + // at once instead of copying them one by one. + let mut subseqidx = i; + + while i < total { + let i_ = i; + let byte = unsafe_get(v, i); + i += 1; + + macro_rules! error { () => ({ + unsafe { + if subseqidx != i_ { + res.as_mut_vec().extend_from_slice(&v[subseqidx..i_]); + } + subseqidx = i; + res.as_mut_vec().extend_from_slice(REPLACEMENT); + } + })} + + if byte < 128 { + // subseqidx handles this + } else { + let w = core_str::utf8_char_width(byte); + + match w { + 2 => { + if safe_get(v, i, total) & 192 != TAG_CONT_U8 { + error!(); + continue; + } + i += 1; + } + 3 => { + match (byte, safe_get(v, i, total)) { + (0xE0, 0xA0...0xBF) => (), + (0xE1...0xEC, 0x80...0xBF) => (), + (0xED, 0x80...0x9F) => (), + (0xEE...0xEF, 0x80...0xBF) => (), + _ => { + error!(); + continue; + } + } + i += 1; + if safe_get(v, i, total) & 192 != TAG_CONT_U8 { + error!(); + continue; + } + i += 1; + } + 4 => { + match (byte, safe_get(v, i, total)) { + (0xF0, 0x90...0xBF) => (), + (0xF1...0xF3, 0x80...0xBF) => (), + (0xF4, 0x80...0x8F) => (), + _ => { + error!(); + continue; + } + } + i += 1; + if safe_get(v, i, total) & 192 != TAG_CONT_U8 { + error!(); + continue; + } + i += 1; + if safe_get(v, i, total) & 192 != TAG_CONT_U8 { + error!(); + continue; + } + i += 1; + } + _ => { + error!(); + continue; + } + } + } + } + if subseqidx < total { + unsafe { res.as_mut_vec().extend_from_slice(&v[subseqidx..total]) }; + } + Cow::Owned(res) + } + + /// Decode a UTF-16 encoded vector `v` into a `String`, returning `Err` + /// if `v` contains any invalid data. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // 𝄞music + /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, + /// 0x0073, 0x0069, 0x0063]; + /// assert_eq!(String::from("𝄞music"), + /// String::from_utf16(v).unwrap()); + /// + /// // 𝄞mu<invalid>ic + /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, + /// 0xD800, 0x0069, 0x0063]; + /// assert!(String::from_utf16(v).is_err()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf16(v: &[u16]) -> Result<String, FromUtf16Error> { + decode_utf16(v.iter().cloned()).collect::<Result<_, _>>().map_err(|_| FromUtf16Error(())) + } + + /// Decode a UTF-16 encoded vector `v` into a string, replacing + /// invalid data with the replacement character (U+FFFD). + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // 𝄞mus<invalid>ic<invalid> + /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, + /// 0x0073, 0xDD1E, 0x0069, 0x0063, + /// 0xD834]; + /// + /// assert_eq!(String::from("𝄞mus\u{FFFD}ic\u{FFFD}"), + /// String::from_utf16_lossy(v)); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf16_lossy(v: &[u16]) -> String { + decode_utf16(v.iter().cloned()).map(|r| r.unwrap_or(REPLACEMENT_CHARACTER)).collect() + } + + /// Creates a new `String` from a length, capacity, and pointer. + /// + /// # Safety + /// + /// This is highly unsafe, due to the number of invariants that aren't + /// checked: + /// + /// * The memory at `ptr` needs to have been previously allocated by the + /// same allocator the standard library uses. + /// * `length` needs to be less than or equal to `capacity`. + /// * `capacity` needs to be the correct value. + /// + /// Violating these may cause problems like corrupting the allocator's + /// internal datastructures. + /// + /// The ownership of `ptr` is effectively transferred to the + /// `String` which may then deallocate, reallocate or change the + /// contents of memory pointed to by the pointer at will. Ensure + /// that nothing else uses the pointer after calling this + /// function. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::mem; + /// + /// unsafe { + /// let s = String::from("hello"); + /// let ptr = s.as_ptr(); + /// let len = s.len(); + /// let capacity = s.capacity(); + /// + /// mem::forget(s); + /// + /// let s = String::from_raw_parts(ptr as *mut _, len, capacity); + /// + /// assert_eq!(String::from("hello"), s); + /// } + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn from_raw_parts(buf: *mut u8, length: usize, capacity: usize) -> String { + String { vec: Vec::from_raw_parts(buf, length, capacity) } + } + + /// Converts a vector of bytes to a `String` without checking that the + /// string contains valid UTF-8. + /// + /// See the safe version, [`from_utf8`], for more details. + /// + /// [`from_utf8`]: struct.String.html#method.from_utf8 + /// + /// # Safety + /// + /// This function is unsafe because it does not check that the bytes passed + /// to it are valid UTF-8. If this constraint is violated, it may cause + /// memory unsafety issues with future users of the `String`, as the rest of + /// the standard library assumes that `String`s are valid UTF-8. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some bytes, in a vector + /// let sparkle_heart = vec![240, 159, 146, 150]; + /// + /// let sparkle_heart = unsafe { + /// String::from_utf8_unchecked(sparkle_heart) + /// }; + /// + /// assert_eq!("💖", sparkle_heart); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn from_utf8_unchecked(bytes: Vec<u8>) -> String { + String { vec: bytes } + } + + /// Converts a `String` into a byte vector. + /// + /// This consumes the `String`, so we do not need to copy its contents. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("hello"); + /// let bytes = s.into_bytes(); + /// + /// assert_eq!(&[104, 101, 108, 108, 111][..], &bytes[..]); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_bytes(self) -> Vec<u8> { + self.vec + } + + /// Extracts a string slice containing the entire string. + #[inline] + #[stable(feature = "string_as_str", since = "1.7.0")] + pub fn as_str(&self) -> &str { + self + } + + /// Extracts a string slice containing the entire string. + #[inline] + #[stable(feature = "string_as_str", since = "1.7.0")] + pub fn as_mut_str(&mut self) -> &mut str { + self + } + + /// Appends a given string slice onto the end of this `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// s.push_str("bar"); + /// + /// assert_eq!("foobar", s); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn push_str(&mut self, string: &str) { + self.vec.extend_from_slice(string.as_bytes()) + } + + /// Returns this `String`'s capacity, in bytes. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::with_capacity(10); + /// + /// assert!(s.capacity() >= 10); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn capacity(&self) -> usize { + self.vec.capacity() + } + + /// Ensures that this `String`'s capacity is at least `additional` bytes + /// larger than its length. + /// + /// The capacity may be increased by more than `additional` bytes if it + /// chooses, to prevent frequent reallocations. + /// + /// If you do not want this "at least" behavior, see the [`reserve_exact`] + /// method. + /// + /// [`reserve_exact`]: #method.reserve_exact + /// + /// # Panics + /// + /// Panics if the new capacity overflows `usize`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::new(); + /// + /// s.reserve(10); + /// + /// assert!(s.capacity() >= 10); + /// ``` + /// + /// This may not actually increase the capacity: + /// + /// ``` + /// let mut s = String::with_capacity(10); + /// s.push('a'); + /// s.push('b'); + /// + /// // s now has a length of 2 and a capacity of 10 + /// assert_eq!(2, s.len()); + /// assert_eq!(10, s.capacity()); + /// + /// // Since we already have an extra 8 capacity, calling this... + /// s.reserve(8); + /// + /// // ... doesn't actually increase. + /// assert_eq!(10, s.capacity()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve(&mut self, additional: usize) { + self.vec.reserve(additional) + } + + /// Ensures that this `String`'s capacity is `additional` bytes + /// larger than its length. + /// + /// Consider using the [`reserve`] method unless you absolutely know + /// better than the allocator. + /// + /// [`reserve`]: #method.reserve + /// + /// # Panics + /// + /// Panics if the new capacity overflows `usize`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::new(); + /// + /// s.reserve_exact(10); + /// + /// assert!(s.capacity() >= 10); + /// ``` + /// + /// This may not actually increase the capacity: + /// + /// ``` + /// let mut s = String::with_capacity(10); + /// s.push('a'); + /// s.push('b'); + /// + /// // s now has a length of 2 and a capacity of 10 + /// assert_eq!(2, s.len()); + /// assert_eq!(10, s.capacity()); + /// + /// // Since we already have an extra 8 capacity, calling this... + /// s.reserve_exact(8); + /// + /// // ... doesn't actually increase. + /// assert_eq!(10, s.capacity()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve_exact(&mut self, additional: usize) { + self.vec.reserve_exact(additional) + } + + /// Shrinks the capacity of this `String` to match its length. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// s.reserve(100); + /// assert!(s.capacity() >= 100); + /// + /// s.shrink_to_fit(); + /// assert_eq!(3, s.capacity()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn shrink_to_fit(&mut self) { + self.vec.shrink_to_fit() + } + + /// Appends the given `char` to the end of this `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("abc"); + /// + /// s.push('1'); + /// s.push('2'); + /// s.push('3'); + /// + /// assert_eq!("abc123", s); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn push(&mut self, ch: char) { + match ch.len_utf8() { + 1 => self.vec.push(ch as u8), + _ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()), + } + } + + /// Returns a byte slice of this `String`'s contents. + /// + /// The inverse of this method is [`from_utf8`]. + /// + /// [`from_utf8`]: #method.from_utf8 + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("hello"); + /// + /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn as_bytes(&self) -> &[u8] { + &self.vec + } + + /// Shortens this `String` to the specified length. + /// + /// If `new_len` is greater than the string's current length, this has no + /// effect. + /// + /// Note that this method has no effect on the allocated capacity + /// of the string + /// + /// # Panics + /// + /// Panics if `new_len` does not lie on a [`char`] boundary. + /// + /// [`char`]: ../../std/primitive.char.html + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("hello"); + /// + /// s.truncate(2); + /// + /// assert_eq!("he", s); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn truncate(&mut self, new_len: usize) { + if new_len <= self.len() { + assert!(self.is_char_boundary(new_len)); + self.vec.truncate(new_len) + } + } + + /// Removes the last character from the string buffer and returns it. + /// + /// Returns `None` if this `String` is empty. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// assert_eq!(s.pop(), Some('o')); + /// assert_eq!(s.pop(), Some('o')); + /// assert_eq!(s.pop(), Some('f')); + /// + /// assert_eq!(s.pop(), None); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn pop(&mut self) -> Option<char> { + let ch = match self.chars().rev().next() { + Some(ch) => ch, + None => return None, + }; + let newlen = self.len() - ch.len_utf8(); + unsafe { + self.vec.set_len(newlen); + } + Some(ch) + } + + /// Removes a `char` from this `String` at a byte position and returns it. + /// + /// This is an `O(n)` operation, as it requires copying every element in the + /// buffer. + /// + /// # Panics + /// + /// Panics if `idx` is larger than or equal to the `String`'s length, + /// or if it does not lie on a [`char`] boundary. + /// + /// [`char`]: ../../std/primitive.char.html + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// assert_eq!(s.remove(0), 'f'); + /// assert_eq!(s.remove(1), 'o'); + /// assert_eq!(s.remove(0), 'o'); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn remove(&mut self, idx: usize) -> char { + let ch = match self[idx..].chars().next() { + Some(ch) => ch, + None => panic!("cannot remove a char from the end of a string"), + }; + + let next = idx + ch.len_utf8(); + let len = self.len(); + unsafe { + ptr::copy(self.vec.as_ptr().offset(next as isize), + self.vec.as_mut_ptr().offset(idx as isize), + len - next); + self.vec.set_len(len - (next - idx)); + } + ch + } + + /// Inserts a character into this `String` at a byte position. + /// + /// This is an `O(n)` operation as it requires copying every element in the + /// buffer. + /// + /// # Panics + /// + /// Panics if `idx` is larger than the `String`'s length, or if it does not + /// lie on a [`char`] boundary. + /// + /// [`char`]: ../../std/primitive.char.html + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::with_capacity(3); + /// + /// s.insert(0, 'f'); + /// s.insert(1, 'o'); + /// s.insert(2, 'o'); + /// + /// assert_eq!("foo", s); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn insert(&mut self, idx: usize, ch: char) { + assert!(self.is_char_boundary(idx)); + let mut bits = [0; 4]; + let bits = ch.encode_utf8(&mut bits).as_bytes(); + + unsafe { + self.insert_bytes(idx, bits); + } + } + + unsafe fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) { + let len = self.len(); + let amt = bytes.len(); + self.vec.reserve(amt); + + ptr::copy(self.vec.as_ptr().offset(idx as isize), + self.vec.as_mut_ptr().offset((idx + amt) as isize), + len - idx); + ptr::copy(bytes.as_ptr(), + self.vec.as_mut_ptr().offset(idx as isize), + amt); + self.vec.set_len(len + amt); + } + + /// Inserts a string slice into this `String` at a byte position. + /// + /// This is an `O(n)` operation as it requires copying every element in the + /// buffer. + /// + /// # Panics + /// + /// Panics if `idx` is larger than the `String`'s length, or if it does not + /// lie on a [`char`] boundary. + /// + /// [`char`]: ../../std/primitive.char.html + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("bar"); + /// + /// s.insert_str(0, "foo"); + /// + /// assert_eq!("foobar", s); + /// ``` + #[inline] + #[stable(feature = "insert_str", since = "1.16.0")] + pub fn insert_str(&mut self, idx: usize, string: &str) { + assert!(self.is_char_boundary(idx)); + + unsafe { + self.insert_bytes(idx, string.as_bytes()); + } + } + + /// Returns a mutable reference to the contents of this `String`. + /// + /// # Safety + /// + /// This function is unsafe because it does not check that the bytes passed + /// to it are valid UTF-8. If this constraint is violated, it may cause + /// memory unsafety issues with future users of the `String`, as the rest of + /// the standard library assumes that `String`s are valid UTF-8. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("hello"); + /// + /// unsafe { + /// let vec = s.as_mut_vec(); + /// assert_eq!(&[104, 101, 108, 108, 111][..], &vec[..]); + /// + /// vec.reverse(); + /// } + /// assert_eq!(s, "olleh"); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<u8> { + &mut self.vec + } + + /// Returns the length of this `String`, in bytes. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let a = String::from("foo"); + /// + /// assert_eq!(a.len(), 3); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn len(&self) -> usize { + self.vec.len() + } + + /// Returns `true` if this `String` has a length of zero. + /// + /// Returns `false` otherwise. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut v = String::new(); + /// assert!(v.is_empty()); + /// + /// v.push('a'); + /// assert!(!v.is_empty()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Splits the string into two at the given index. + /// + /// Returns a newly allocated `String`. `self` contains bytes `[0, at)`, and + /// the returned `String` contains bytes `[at, len)`. `at` must be on the + /// boundary of a UTF-8 code point. + /// + /// Note that the capacity of `self` does not change. + /// + /// # Panics + /// + /// Panics if `at` is not on a `UTF-8` code point boundary, or if it is beyond the last + /// code point of the string. + /// + /// # Examples + /// + /// ``` + /// # fn main() { + /// let mut hello = String::from("Hello, World!"); + /// let world = hello.split_off(7); + /// assert_eq!(hello, "Hello, "); + /// assert_eq!(world, "World!"); + /// # } + /// ``` + #[inline] + #[stable(feature = "string_split_off", since = "1.16.0")] + pub fn split_off(&mut self, at: usize) -> String { + assert!(self.is_char_boundary(at)); + let other = self.vec.split_off(at); + unsafe { String::from_utf8_unchecked(other) } + } + + /// Truncates this `String`, removing all contents. + /// + /// While this means the `String` will have a length of zero, it does not + /// touch its capacity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// s.clear(); + /// + /// assert!(s.is_empty()); + /// assert_eq!(0, s.len()); + /// assert_eq!(3, s.capacity()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn clear(&mut self) { + self.vec.clear() + } + + /// Creates a draining iterator that removes the specified range in the string + /// and yields the removed chars. + /// + /// Note: The element range is removed even if the iterator is not + /// consumed until the end. + /// + /// # Panics + /// + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// [`char`]: ../../std/primitive.char.html + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("α is alpha, β is beta"); + /// let beta_offset = s.find('β').unwrap_or(s.len()); + /// + /// // Remove the range up until the β from the string + /// let t: String = s.drain(..beta_offset).collect(); + /// assert_eq!(t, "α is alpha, "); + /// assert_eq!(s, "β is beta"); + /// + /// // A full range clears the string + /// s.drain(..); + /// assert_eq!(s, ""); + /// ``` + #[stable(feature = "drain", since = "1.6.0")] + pub fn drain<R>(&mut self, range: R) -> Drain + where R: RangeArgument<usize> + { + // Memory safety + // + // The String version of Drain does not have the memory safety issues + // of the vector version. The data is just plain bytes. + // Because the range removal happens in Drop, if the Drain iterator is leaked, + // the removal will not happen. + let len = self.len(); + let start = match range.start() { + Included(&n) => n, + Excluded(&n) => n + 1, + Unbounded => 0, + }; + let end = match range.end() { + Included(&n) => n + 1, + Excluded(&n) => n, + Unbounded => len, + }; + + // Take out two simultaneous borrows. The &mut String won't be accessed + // until iteration is over, in Drop. + let self_ptr = self as *mut _; + // slicing does the appropriate bounds checks + let chars_iter = self[start..end].chars(); + + Drain { + start: start, + end: end, + iter: chars_iter, + string: self_ptr, + } + } + + /// Creates a splicing iterator that removes the specified range in the string, + /// replaces with the given string, and yields the removed chars. + /// The given string doesn’t need to be the same length as the range. + /// + /// Note: The element range is removed when the `Splice` is dropped, + /// even if the iterator is not consumed until the end. + /// + /// # Panics + /// + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// [`char`]: ../../std/primitive.char.html + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// #![feature(splice)] + /// let mut s = String::from("α is alpha, β is beta"); + /// let beta_offset = s.find('β').unwrap_or(s.len()); + /// + /// // Replace the range up until the β from the string + /// let t: String = s.splice(..beta_offset, "Α is capital alpha; ").collect(); + /// assert_eq!(t, "α is alpha, "); + /// assert_eq!(s, "Α is capital alpha; β is beta"); + /// ``` + #[unstable(feature = "splice", reason = "recently added", issue = "32310")] + pub fn splice<'a, 'b, R>(&'a mut self, range: R, replace_with: &'b str) -> Splice<'a, 'b> + where R: RangeArgument<usize> + { + // Memory safety + // + // The String version of Splice does not have the memory safety issues + // of the vector version. The data is just plain bytes. + // Because the range removal happens in Drop, if the Splice iterator is leaked, + // the removal will not happen. + let len = self.len(); + let start = match range.start() { + Included(&n) => n, + Excluded(&n) => n + 1, + Unbounded => 0, + }; + let end = match range.end() { + Included(&n) => n + 1, + Excluded(&n) => n, + Unbounded => len, + }; + + // Take out two simultaneous borrows. The &mut String won't be accessed + // until iteration is over, in Drop. + let self_ptr = self as *mut _; + // slicing does the appropriate bounds checks + let chars_iter = self[start..end].chars(); + + Splice { + start: start, + end: end, + iter: chars_iter, + string: self_ptr, + replace_with: replace_with + } + } + + /// Converts this `String` into a `Box<str>`. + /// + /// This will drop any excess capacity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("hello"); + /// + /// let b = s.into_boxed_str(); + /// ``` + #[stable(feature = "box_str", since = "1.4.0")] + pub fn into_boxed_str(self) -> Box<str> { + let slice = self.vec.into_boxed_slice(); + unsafe { from_boxed_utf8_unchecked(slice) } + } +} + +impl FromUtf8Error { + /// Returns a slice of [`u8`]s bytes that were attempted to convert to a `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// #![feature(from_utf8_error_as_bytes)] + /// // some invalid bytes, in a vector + /// let bytes = vec![0, 159]; + /// + /// let value = String::from_utf8(bytes); + /// + /// assert_eq!(&[0, 159], value.unwrap_err().as_bytes()); + /// ``` + #[unstable(feature = "from_utf8_error_as_bytes", reason = "recently added", issue = "40895")] + pub fn as_bytes(&self) -> &[u8] { + &self.bytes[..] + } + + /// Returns the bytes that were attempted to convert to a `String`. + /// + /// This method is carefully constructed to avoid allocation. It will + /// consume the error, moving out the bytes, so that a copy of the bytes + /// does not need to be made. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some invalid bytes, in a vector + /// let bytes = vec![0, 159]; + /// + /// let value = String::from_utf8(bytes); + /// + /// assert_eq!(vec![0, 159], value.unwrap_err().into_bytes()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_bytes(self) -> Vec<u8> { + self.bytes + } + + /// Fetch a `Utf8Error` to get more details about the conversion failure. + /// + /// The [`Utf8Error`] type provided by [`std::str`] represents an error that may + /// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's + /// an analogue to `FromUtf8Error`. See its documentation for more details + /// on using it. + /// + /// [`Utf8Error`]: ../../std/str/struct.Utf8Error.html + /// [`std::str`]: ../../std/str/index.html + /// [`u8`]: ../../std/primitive.u8.html + /// [`&str`]: ../../std/primitive.str.html + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some invalid bytes, in a vector + /// let bytes = vec![0, 159]; + /// + /// let error = String::from_utf8(bytes).unwrap_err().utf8_error(); + /// + /// // the first byte is invalid here + /// assert_eq!(1, error.valid_up_to()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn utf8_error(&self) -> Utf8Error { + self.error + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for FromUtf8Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.error, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for FromUtf16Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt("invalid utf-16: lone surrogate found", f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Clone for String { + fn clone(&self) -> Self { + String { vec: self.vec.clone() } + } + + fn clone_from(&mut self, source: &Self) { + self.vec.clone_from(&source.vec); + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl FromIterator<char> for String { + fn from_iter<I: IntoIterator<Item = char>>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[stable(feature = "string_from_iter_by_ref", since = "1.17.0")] +impl<'a> FromIterator<&'a char> for String { + fn from_iter<I: IntoIterator<Item = &'a char>>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> FromIterator<&'a str> for String { + fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[stable(feature = "extend_string", since = "1.4.0")] +impl FromIterator<String> for String { + fn from_iter<I: IntoIterator<Item = String>>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[stable(feature = "herd_cows", since = "1.19.0")] +impl<'a> FromIterator<Cow<'a, str>> for String { + fn from_iter<I: IntoIterator<Item = Cow<'a, str>>>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Extend<char> for String { + fn extend<I: IntoIterator<Item = char>>(&mut self, iter: I) { + let iterator = iter.into_iter(); + let (lower_bound, _) = iterator.size_hint(); + self.reserve(lower_bound); + for ch in iterator { + self.push(ch) + } + } +} + +#[stable(feature = "extend_ref", since = "1.2.0")] +impl<'a> Extend<&'a char> for String { + fn extend<I: IntoIterator<Item = &'a char>>(&mut self, iter: I) { + self.extend(iter.into_iter().cloned()); + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> Extend<&'a str> for String { + fn extend<I: IntoIterator<Item = &'a str>>(&mut self, iter: I) { + for s in iter { + self.push_str(s) + } + } +} + +#[stable(feature = "extend_string", since = "1.4.0")] +impl Extend<String> for String { + fn extend<I: IntoIterator<Item = String>>(&mut self, iter: I) { + for s in iter { + self.push_str(&s) + } + } +} + +#[stable(feature = "herd_cows", since = "1.19.0")] +impl<'a> Extend<Cow<'a, str>> for String { + fn extend<I: IntoIterator<Item = Cow<'a, str>>>(&mut self, iter: I) { + for s in iter { + self.push_str(&s) + } + } +} + +/// A convenience impl that delegates to the impl for `&str` +#[unstable(feature = "pattern", + reason = "API not fully fleshed out and ready to be stabilized", + issue = "27721")] +impl<'a, 'b> Pattern<'a> for &'b String { + type Searcher = <&'b str as Pattern<'a>>::Searcher; + + fn into_searcher(self, haystack: &'a str) -> <&'b str as Pattern<'a>>::Searcher { + self[..].into_searcher(haystack) + } + + #[inline] + fn is_contained_in(self, haystack: &'a str) -> bool { + self[..].is_contained_in(haystack) + } + + #[inline] + fn is_prefix_of(self, haystack: &'a str) -> bool { + self[..].is_prefix_of(haystack) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for String { + #[inline] + fn eq(&self, other: &String) -> bool { + PartialEq::eq(&self[..], &other[..]) + } + #[inline] + fn ne(&self, other: &String) -> bool { + PartialEq::ne(&self[..], &other[..]) + } +} + +macro_rules! impl_eq { + ($lhs:ty, $rhs: ty) => { + #[stable(feature = "rust1", since = "1.0.0")] + impl<'a, 'b> PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { PartialEq::eq(&self[..], &other[..]) } + #[inline] + fn ne(&self, other: &$rhs) -> bool { PartialEq::ne(&self[..], &other[..]) } + } + + #[stable(feature = "rust1", since = "1.0.0")] + impl<'a, 'b> PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { PartialEq::eq(&self[..], &other[..]) } + #[inline] + fn ne(&self, other: &$lhs) -> bool { PartialEq::ne(&self[..], &other[..]) } + } + + } +} + +impl_eq! { String, str } +impl_eq! { String, &'a str } +impl_eq! { Cow<'a, str>, str } +impl_eq! { Cow<'a, str>, &'b str } +impl_eq! { Cow<'a, str>, String } + +#[stable(feature = "rust1", since = "1.0.0")] +impl Default for String { + /// Creates an empty `String`. + #[inline] + fn default() -> String { + String::new() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for String { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for String { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl hash::Hash for String { + #[inline] + fn hash<H: hash::Hasher>(&self, hasher: &mut H) { + (**self).hash(hasher) + } +} + +/// Implements the `+` operator for concatenating two strings. +/// +/// This consumes the `String` on the left-hand side and re-uses its buffer (growing it if +/// necessary). This is done to avoid allocating a new `String` and copying the entire contents on +/// every operation, which would lead to `O(n^2)` running time when building an `n`-byte string by +/// repeated concatenation. +/// +/// The string on the right-hand side is only borrowed; its contents are copied into the returned +/// `String`. +/// +/// # Examples +/// +/// Concatenating two `String`s takes the first by value and borrows the second: +/// +/// ``` +/// let a = String::from("hello"); +/// let b = String::from(" world"); +/// let c = a + &b; +/// // `a` is moved and can no longer be used here. +/// ``` +/// +/// If you want to keep using the first `String`, you can clone it and append to the clone instead: +/// +/// ``` +/// let a = String::from("hello"); +/// let b = String::from(" world"); +/// let c = a.clone() + &b; +/// // `a` is still valid here. +/// ``` +/// +/// Concatenating `&str` slices can be done by converting the first to a `String`: +/// +/// ``` +/// let a = "hello"; +/// let b = " world"; +/// let c = a.to_string() + b; +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> Add<&'a str> for String { + type Output = String; + + #[inline] + fn add(mut self, other: &str) -> String { + self.push_str(other); + self + } +} + +/// Implements the `+=` operator for appending to a `String`. +/// +/// This has the same behavior as the [`push_str`] method. +/// +/// [`push_str`]: struct.String.html#method.push_str +#[stable(feature = "stringaddassign", since = "1.12.0")] +impl<'a> AddAssign<&'a str> for String { + #[inline] + fn add_assign(&mut self, other: &str) { + self.push_str(other); + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index<ops::Range<usize>> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::Range<usize>) -> &str { + &self[..][index] + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index<ops::RangeTo<usize>> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeTo<usize>) -> &str { + &self[..][index] + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index<ops::RangeFrom<usize>> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeFrom<usize>) -> &str { + &self[..][index] + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index<ops::RangeFull> for String { + type Output = str; + + #[inline] + fn index(&self, _index: ops::RangeFull) -> &str { + unsafe { str::from_utf8_unchecked(&self.vec) } + } +} +#[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")] +impl ops::Index<ops::RangeInclusive<usize>> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeInclusive<usize>) -> &str { + Index::index(&**self, index) + } +} +#[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")] +impl ops::Index<ops::RangeToInclusive<usize>> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeToInclusive<usize>) -> &str { + Index::index(&**self, index) + } +} + +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut<ops::Range<usize>> for String { + #[inline] + fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str { + &mut self[..][index] + } +} +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut<ops::RangeTo<usize>> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str { + &mut self[..][index] + } +} +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut<ops::RangeFrom<usize>> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str { + &mut self[..][index] + } +} +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut<ops::RangeFull> for String { + #[inline] + fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str { + unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) } + } +} +#[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")] +impl ops::IndexMut<ops::RangeInclusive<usize>> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str { + IndexMut::index_mut(&mut **self, index) + } +} +#[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")] +impl ops::IndexMut<ops::RangeToInclusive<usize>> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str { + IndexMut::index_mut(&mut **self, index) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Deref for String { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + unsafe { str::from_utf8_unchecked(&self.vec) } + } +} + +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::DerefMut for String { + #[inline] + fn deref_mut(&mut self) -> &mut str { + unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) } + } +} + +/// An error when parsing a `String`. +/// +/// This `enum` is slightly awkward: it will never actually exist. This error is +/// part of the type signature of the implementation of [`FromStr`] on +/// [`String`]. The return type of [`from_str`], requires that an error be +/// defined, but, given that a [`String`] can always be made into a new +/// [`String`] without error, this type will never actually be returned. As +/// such, it is only here to satisfy said signature, and is useless otherwise. +/// +/// [`FromStr`]: ../../std/str/trait.FromStr.html +/// [`String`]: struct.String.html +/// [`from_str`]: ../../std/str/trait.FromStr.html#tymethod.from_str +#[stable(feature = "str_parse_error", since = "1.5.0")] +#[derive(Copy)] +pub enum ParseError {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl FromStr for String { + type Err = ParseError; + #[inline] + fn from_str(s: &str) -> Result<String, ParseError> { + Ok(String::from(s)) + } +} + +#[stable(feature = "str_parse_error", since = "1.5.0")] +impl Clone for ParseError { + fn clone(&self) -> ParseError { + match *self {} + } +} + +#[stable(feature = "str_parse_error", since = "1.5.0")] +impl fmt::Debug for ParseError { + fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result { + match *self {} + } +} + +#[stable(feature = "str_parse_error2", since = "1.8.0")] +impl fmt::Display for ParseError { + fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result { + match *self {} + } +} + +#[stable(feature = "str_parse_error", since = "1.5.0")] +impl PartialEq for ParseError { + fn eq(&self, _: &ParseError) -> bool { + match *self {} + } +} + +#[stable(feature = "str_parse_error", since = "1.5.0")] +impl Eq for ParseError {} + +/// A trait for converting a value to a `String`. +/// +/// This trait is automatically implemented for any type which implements the +/// [`Display`] trait. As such, `ToString` shouldn't be implemented directly: +/// [`Display`] should be implemented instead, and you get the `ToString` +/// implementation for free. +/// +/// [`Display`]: ../../std/fmt/trait.Display.html +#[stable(feature = "rust1", since = "1.0.0")] +pub trait ToString { + /// Converts the given value to a `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let i = 5; + /// let five = String::from("5"); + /// + /// assert_eq!(five, i.to_string()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + fn to_string(&self) -> String; +} + +/// # Panics +/// +/// In this implementation, the `to_string` method panics +/// if the `Display` implementation returns an error. +/// This indicates an incorrect `Display` implementation +/// since `fmt::Write for String` never returns an error itself. +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: fmt::Display + ?Sized> ToString for T { + #[inline] + default fn to_string(&self) -> String { + use core::fmt::Write; + let mut buf = String::new(); + buf.write_fmt(format_args!("{}", self)) + .expect("a Display implementation return an error unexpectedly"); + buf.shrink_to_fit(); + buf + } +} + +#[stable(feature = "str_to_string_specialization", since = "1.9.0")] +impl ToString for str { + #[inline] + fn to_string(&self) -> String { + String::from(self) + } +} + +#[stable(feature = "cow_str_to_string_specialization", since = "1.17.0")] +impl<'a> ToString for Cow<'a, str> { + #[inline] + fn to_string(&self) -> String { + self[..].to_owned() + } +} + +#[stable(feature = "string_to_string_specialization", since = "1.17.0")] +impl ToString for String { + #[inline] + fn to_string(&self) -> String { + self.to_owned() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef<str> for String { + #[inline] + fn as_ref(&self) -> &str { + self + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef<[u8]> for String { + #[inline] + fn as_ref(&self) -> &[u8] { + self.as_bytes() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> From<&'a str> for String { + fn from(s: &'a str) -> String { + s.to_owned() + } +} + +// note: test pulls in libstd, which causes errors here +#[cfg(not(test))] +#[stable(feature = "string_from_box", since = "1.18.0")] +impl From<Box<str>> for String { + fn from(s: Box<str>) -> String { + s.into_string() + } +} + +#[stable(feature = "box_from_str", since = "1.18.0")] +impl Into<Box<str>> for String { + fn into(self) -> Box<str> { + self.into_boxed_str() + } +} + +#[stable(feature = "string_from_cow_str", since = "1.14.0")] +impl<'a> From<Cow<'a, str>> for String { + fn from(s: Cow<'a, str>) -> String { + s.into_owned() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> From<&'a str> for Cow<'a, str> { + #[inline] + fn from(s: &'a str) -> Cow<'a, str> { + Cow::Borrowed(s) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> From<String> for Cow<'a, str> { + #[inline] + fn from(s: String) -> Cow<'a, str> { + Cow::Owned(s) + } +} + +#[stable(feature = "cow_str_from_iter", since = "1.12.0")] +impl<'a> FromIterator<char> for Cow<'a, str> { + fn from_iter<I: IntoIterator<Item = char>>(it: I) -> Cow<'a, str> { + Cow::Owned(FromIterator::from_iter(it)) + } +} + +#[stable(feature = "cow_str_from_iter", since = "1.12.0")] +impl<'a, 'b> FromIterator<&'b str> for Cow<'a, str> { + fn from_iter<I: IntoIterator<Item = &'b str>>(it: I) -> Cow<'a, str> { + Cow::Owned(FromIterator::from_iter(it)) + } +} + +#[stable(feature = "cow_str_from_iter", since = "1.12.0")] +impl<'a> FromIterator<String> for Cow<'a, str> { + fn from_iter<I: IntoIterator<Item = String>>(it: I) -> Cow<'a, str> { + Cow::Owned(FromIterator::from_iter(it)) + } +} + +#[stable(feature = "from_string_for_vec_u8", since = "1.14.0")] +impl From<String> for Vec<u8> { + fn from(string: String) -> Vec<u8> { + string.into_bytes() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Write for String { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + self.push_str(s); + Ok(()) + } + + #[inline] + fn write_char(&mut self, c: char) -> fmt::Result { + self.push(c); + Ok(()) + } +} + +/// A draining iterator for `String`. +/// +/// This struct is created by the [`drain`] method on [`String`]. See its +/// documentation for more. +/// +/// [`drain`]: struct.String.html#method.drain +/// [`String`]: struct.String.html +#[stable(feature = "drain", since = "1.6.0")] +pub struct Drain<'a> { + /// Will be used as &'a mut String in the destructor + string: *mut String, + /// Start of part to remove + start: usize, + /// End of part to remove + end: usize, + /// Current remaining range to remove + iter: Chars<'a>, +} + +#[stable(feature = "collection_debug", since = "1.17.0")] +impl<'a> fmt::Debug for Drain<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.pad("Drain { .. }") + } +} + +#[stable(feature = "drain", since = "1.6.0")] +unsafe impl<'a> Sync for Drain<'a> {} +#[stable(feature = "drain", since = "1.6.0")] +unsafe impl<'a> Send for Drain<'a> {} + +#[stable(feature = "drain", since = "1.6.0")] +impl<'a> Drop for Drain<'a> { + fn drop(&mut self) { + unsafe { + // Use Vec::drain. "Reaffirm" the bounds checks to avoid + // panic code being inserted again. + let self_vec = (*self.string).as_mut_vec(); + if self.start <= self.end && self.end <= self_vec.len() { + self_vec.drain(self.start..self.end); + } + } + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl<'a> Iterator for Drain<'a> { + type Item = char; + + #[inline] + fn next(&mut self) -> Option<char> { + self.iter.next() + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.iter.size_hint() + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl<'a> DoubleEndedIterator for Drain<'a> { + #[inline] + fn next_back(&mut self) -> Option<char> { + self.iter.next_back() + } +} + +#[unstable(feature = "fused", issue = "35602")] +impl<'a> FusedIterator for Drain<'a> {} + +/// A splicing iterator for `String`. +/// +/// This struct is created by the [`splice()`] method on [`String`]. See its +/// documentation for more. +/// +/// [`splice()`]: struct.String.html#method.splice +/// [`String`]: struct.String.html +#[derive(Debug)] +#[unstable(feature = "splice", reason = "recently added", issue = "32310")] +pub struct Splice<'a, 'b> { + /// Will be used as &'a mut String in the destructor + string: *mut String, + /// Start of part to remove + start: usize, + /// End of part to remove + end: usize, + /// Current remaining range to remove + iter: Chars<'a>, + replace_with: &'b str, +} + +#[unstable(feature = "splice", reason = "recently added", issue = "32310")] +unsafe impl<'a, 'b> Sync for Splice<'a, 'b> {} +#[unstable(feature = "splice", reason = "recently added", issue = "32310")] +unsafe impl<'a, 'b> Send for Splice<'a, 'b> {} + +#[unstable(feature = "splice", reason = "recently added", issue = "32310")] +impl<'a, 'b> Drop for Splice<'a, 'b> { + fn drop(&mut self) { + unsafe { + let vec = (*self.string).as_mut_vec(); + vec.splice(self.start..self.end, self.replace_with.bytes()); + } + } +} + +#[unstable(feature = "splice", reason = "recently added", issue = "32310")] +impl<'a, 'b> Iterator for Splice<'a, 'b> { + type Item = char; + + #[inline] + fn next(&mut self) -> Option<char> { + self.iter.next() + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.iter.size_hint() + } +} + +#[unstable(feature = "splice", reason = "recently added", issue = "32310")] +impl<'a, 'b> DoubleEndedIterator for Splice<'a, 'b> { + #[inline] + fn next_back(&mut self) -> Option<char> { + self.iter.next_back() + } +} |
