diff options
| -rw-r--r-- | src/libcollections/lib.rs | 1 | ||||
| -rw-r--r-- | src/libcollections/string.rs | 4 | ||||
| -rw-r--r-- | src/libcore/str/mod.rs | 7 | ||||
| -rw-r--r-- | src/libstd/io/mod.rs | 2 | ||||
| -rw-r--r-- | src/libstd_unicode/lib.rs | 2 | ||||
| -rw-r--r-- | src/libstd_unicode/u_str.rs | 26 |
6 files changed, 12 insertions, 30 deletions
diff --git a/src/libcollections/lib.rs b/src/libcollections/lib.rs index 53d5466e12b..f88bdd0ecf3 100644 --- a/src/libcollections/lib.rs +++ b/src/libcollections/lib.rs @@ -54,6 +54,7 @@ #![feature(slice_patterns)] #![feature(specialization)] #![feature(staged_api)] +#![feature(str_internals)] #![feature(trusted_len)] #![feature(unicode)] #![feature(unique)] diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs index 4b37aef860d..c3edba669f0 100644 --- a/src/libcollections/string.rs +++ b/src/libcollections/string.rs @@ -62,9 +62,9 @@ use core::iter::{FromIterator, FusedIterator}; use core::mem; use core::ops::{self, Add, AddAssign, Index, IndexMut}; use core::ptr; +use core::str as core_str; use core::str::pattern::Pattern; use std_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER}; -use std_unicode::str as unicode_str; use borrow::{Cow, ToOwned}; use range::RangeArgument; @@ -575,7 +575,7 @@ impl String { if byte < 128 { // subseqidx handles this } else { - let w = unicode_str::utf8_char_width(byte); + let w = core_str::utf8_char_width(byte); match w { 2 => { diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 925cd84154a..52e33016310 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -1352,6 +1352,13 @@ static UTF8_CHAR_WIDTH: [u8; 256] = [ 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF ]; +/// Given a first byte, determine how many bytes are in this UTF-8 character +#[unstable(feature = "str_internals", issue = "0")] +#[inline] +pub fn utf8_char_width(b: u8) -> usize { + return UTF8_CHAR_WIDTH[b as usize] as usize; +} + /// Mask of the value bits of a continuation byte const CONT_MASK: u8 = 0b0011_1111; /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index 8cb7b2bda75..58788cdcd4c 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -256,7 +256,7 @@ #![stable(feature = "rust1", since = "1.0.0")] use cmp; -use std_unicode::str as core_str; +use core::str as core_str; use error as std_error; use fmt; use result; diff --git a/src/libstd_unicode/lib.rs b/src/libstd_unicode/lib.rs index d52d1549b51..1adf00e40f1 100644 --- a/src/libstd_unicode/lib.rs +++ b/src/libstd_unicode/lib.rs @@ -47,7 +47,7 @@ pub mod char; #[allow(deprecated)] pub mod str { pub use u_str::{SplitWhitespace, UnicodeStr}; - pub use u_str::{is_utf16, utf8_char_width}; + pub use u_str::is_utf16; pub use u_str::Utf16Encoder; } diff --git a/src/libstd_unicode/u_str.rs b/src/libstd_unicode/u_str.rs index 1c7894794c9..0ca6db9b0de 100644 --- a/src/libstd_unicode/u_str.rs +++ b/src/libstd_unicode/u_str.rs @@ -77,32 +77,6 @@ impl UnicodeStr for str { } } -// https://tools.ietf.org/html/rfc3629 -static UTF8_CHAR_WIDTH: [u8; 256] = [ -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF -0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF -4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF -]; - -/// Given a first byte, determine how many bytes are in this UTF-8 character -#[inline] -pub fn utf8_char_width(b: u8) -> usize { - return UTF8_CHAR_WIDTH[b as usize] as usize; -} - /// Determines if a vector of `u16` contains valid UTF-16 pub fn is_utf16(v: &[u16]) -> bool { let mut it = v.iter(); |
