diff options
| author | bors <bors@rust-lang.org> | 2014-07-30 10:31:11 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2014-07-30 10:31:11 +0000 |
| commit | 692077b6431460b96beb0ccf4f38299618d51db2 (patch) | |
| tree | be2f2c3376af9a7c40bcdf3b206e7b89d0c7cec1 | |
| parent | 774d5eb0b07bdb4d3c853159fbcd265de60e7755 (diff) | |
| parent | 235bb3fb1f40dcd7da9b4f8996f137589505de8e (diff) | |
| download | rust-692077b6431460b96beb0ccf4f38299618d51db2.tar.gz rust-692077b6431460b96beb0ccf4f38299618d51db2.zip | |
auto merge of #15777 : SimonSapin/rust/pub-ascii-maps, r=alexcrichton
When dealing with HTTP request or responses, many tokens are case-insensitive in the ASCII range but the bytes from the network are not necessarily valid UTF-8. **[breaking-change]** Rather than adding new very similar traits, this re-uses the `std::ascii::OwnedStrAsciiExt` and `std::ascii::StrAsciiExt` traits, but rename to remove `Str` since that does not apply for bytes. This PR also makes `std::ascii::ASCII_UPPER_MAP` and `std::ascii::ASCII_LOWER_MAP`, the lookup table all these methods are based on, public. In case there is something else related to ASCII case we haven’t thought of yet, that can be implemented outside of libstd without duplicating the tables. Although this is a breaking change, I thought this could do without an RFC since the relevant traits are not in the prelude. r? @alexcrichton
| -rw-r--r-- | src/liblog/directive.rs | 2 | ||||
| -rw-r--r-- | src/librustc/lint/mod.rs | 2 | ||||
| -rw-r--r-- | src/libstd/ascii.rs | 153 | ||||
| -rw-r--r-- | src/test/run-pass/issue-10683.rs | 2 |
4 files changed, 98 insertions, 61 deletions
diff --git a/src/liblog/directive.rs b/src/liblog/directive.rs index 2062bb4c7b1..a8def3e289d 100644 --- a/src/liblog/directive.rs +++ b/src/liblog/directive.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::ascii::StrAsciiExt; +use std::ascii::AsciiExt; use std::cmp; #[deriving(Show, Clone)] diff --git a/src/librustc/lint/mod.rs b/src/librustc/lint/mod.rs index 8896a068baa..9224647bc79 100644 --- a/src/librustc/lint/mod.rs +++ b/src/librustc/lint/mod.rs @@ -31,7 +31,7 @@ #![macro_escape] use std::hash; -use std::ascii::StrAsciiExt; +use std::ascii::AsciiExt; use syntax::codemap::Span; use syntax::visit::FnKind; use syntax::ast; diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index bcc0761d92a..7731cd12ec2 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -20,11 +20,19 @@ use iter::Iterator; use mem; use option::{Option, Some, None}; use slice::{ImmutableVector, MutableVector, Vector}; -use str::{Str, StrAllocating, StrSlice}; +use str::{Str, StrSlice}; +use str; use string::String; use to_string::IntoStr; use vec::Vec; +#[deprecated="this trait has been renamed to `AsciiExt`"] +pub use StrAsciiExt = self::AsciiExt; + +#[deprecated="this trait has been renamed to `OwnedAsciiExt`"] +pub use OwnedStrAsciiExt = self::OwnedAsciiExt; + + /// Datatype to hold one ascii character. It wraps a `u8`, with the highest bit always zero. #[deriving(Clone, PartialEq, PartialOrd, Ord, Eq, Hash)] pub struct Ascii { chr: u8 } @@ -366,108 +374,133 @@ impl IntoBytes for Vec<Ascii> { } } + /// Extension methods for ASCII-subset only operations on owned strings -pub trait OwnedStrAsciiExt { +pub trait OwnedAsciiExt { /// Convert the string to ASCII upper case: /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', /// but non-ASCII letters are unchanged. - fn into_ascii_upper(self) -> String; + fn into_ascii_upper(self) -> Self; /// Convert the string to ASCII lower case: /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', /// but non-ASCII letters are unchanged. - fn into_ascii_lower(self) -> String; + fn into_ascii_lower(self) -> Self; } /// Extension methods for ASCII-subset only operations on string slices -pub trait StrAsciiExt { +pub trait AsciiExt<T> { /// Makes a copy of the string in ASCII upper case: /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', /// but non-ASCII letters are unchanged. - fn to_ascii_upper(&self) -> String; + fn to_ascii_upper(&self) -> T; /// Makes a copy of the string in ASCII lower case: /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', /// but non-ASCII letters are unchanged. - fn to_ascii_lower(&self) -> String; + fn to_ascii_lower(&self) -> T; /// Check that two strings are an ASCII case-insensitive match. /// Same as `to_ascii_lower(a) == to_ascii_lower(b)`, /// but without allocating and copying temporary strings. - fn eq_ignore_ascii_case(&self, other: &str) -> bool; + fn eq_ignore_ascii_case(&self, other: Self) -> bool; } -impl<'a> StrAsciiExt for &'a str { +impl<'a> AsciiExt<String> for &'a str { #[inline] fn to_ascii_upper(&self) -> String { - unsafe { str_copy_map_bytes(*self, ASCII_UPPER_MAP) } + // Vec<u8>::to_ascii_upper() preserves the UTF-8 invariant. + unsafe { str::raw::from_utf8_owned(self.as_bytes().to_ascii_upper()) } } #[inline] fn to_ascii_lower(&self) -> String { - unsafe { str_copy_map_bytes(*self, ASCII_LOWER_MAP) } + // Vec<u8>::to_ascii_lower() preserves the UTF-8 invariant. + unsafe { str::raw::from_utf8_owned(self.as_bytes().to_ascii_lower()) } } #[inline] fn eq_ignore_ascii_case(&self, other: &str) -> bool { - self.len() == other.len() && - self.as_bytes().iter().zip(other.as_bytes().iter()).all( - |(byte_self, byte_other)| { - ASCII_LOWER_MAP[*byte_self as uint] == - ASCII_LOWER_MAP[*byte_other as uint] - }) + self.as_bytes().eq_ignore_ascii_case(other.as_bytes()) } } -impl OwnedStrAsciiExt for String { +impl OwnedAsciiExt for String { #[inline] fn into_ascii_upper(self) -> String { - unsafe { str_map_bytes(self, ASCII_UPPER_MAP) } + // Vec<u8>::into_ascii_upper() preserves the UTF-8 invariant. + unsafe { str::raw::from_utf8_owned(self.into_bytes().into_ascii_upper()) } } #[inline] fn into_ascii_lower(self) -> String { - unsafe { str_map_bytes(self, ASCII_LOWER_MAP) } + // Vec<u8>::into_ascii_lower() preserves the UTF-8 invariant. + unsafe { str::raw::from_utf8_owned(self.into_bytes().into_ascii_lower()) } } } -#[inline] -unsafe fn str_map_bytes(string: String, map: &'static [u8]) -> String { - let mut bytes = string.into_bytes(); +impl<'a> AsciiExt<Vec<u8>> for &'a [u8] { + #[inline] + fn to_ascii_upper(&self) -> Vec<u8> { + self.iter().map(|&byte| ASCII_UPPER_MAP[byte as uint]).collect() + } - for b in bytes.mut_iter() { - *b = map[*b as uint]; + #[inline] + fn to_ascii_lower(&self) -> Vec<u8> { + self.iter().map(|&byte| ASCII_LOWER_MAP[byte as uint]).collect() } - String::from_utf8(bytes).unwrap() + #[inline] + fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool { + self.len() == other.len() && + self.iter().zip(other.iter()).all( + |(byte_self, byte_other)| { + ASCII_LOWER_MAP[*byte_self as uint] == + ASCII_LOWER_MAP[*byte_other as uint] + }) + } } -#[inline] -unsafe fn str_copy_map_bytes(string: &str, map: &'static [u8]) -> String { - let mut s = String::from_str(string); - for b in s.as_mut_bytes().mut_iter() { - *b = map[*b as uint]; +impl OwnedAsciiExt for Vec<u8> { + #[inline] + fn into_ascii_upper(mut self) -> Vec<u8> { + for byte in self.mut_iter() { + *byte = ASCII_UPPER_MAP[*byte as uint]; + } + self + } + + #[inline] + fn into_ascii_lower(mut self) -> Vec<u8> { + for byte in self.mut_iter() { + *byte = ASCII_LOWER_MAP[*byte as uint]; + } + self } - s.into_string() } -static ASCII_LOWER_MAP: &'static [u8] = &[ + +pub static ASCII_LOWER_MAP: [u8, ..256] = [ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, - 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, - 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, - 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'', + b'(', b')', b'*', b'+', b',', b'-', b'.', b'/', + b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', + b'8', b'9', b':', b';', b'<', b'=', b'>', b'?', + b'@', + + b'a', b'b', b'c', b'd', b'e', b'f', b'g', + b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', + b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', + b'x', b'y', b'z', + + b'[', b'\\', b']', b'^', b'_', + b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g', + b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', + b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', + b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, @@ -486,23 +519,27 @@ static ASCII_LOWER_MAP: &'static [u8] = &[ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, ]; -static ASCII_UPPER_MAP: &'static [u8] = &[ +pub static ASCII_UPPER_MAP: [u8, ..256] = [ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, - 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, - 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, - 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, - 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, - 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'', + b'(', b')', b'*', b'+', b',', b'-', b'.', b'/', + b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', + b'8', b'9', b':', b';', b'<', b'=', b'>', b'?', + b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G', + b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', + b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', + b'X', b'Y', b'Z', b'[', b'\\', b']', b'^', b'_', + b'`', + + b'A', b'B', b'C', b'D', b'E', b'F', b'G', + b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', + b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', + b'X', b'Y', b'Z', + + b'{', b'|', b'}', b'~', 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, diff --git a/src/test/run-pass/issue-10683.rs b/src/test/run-pass/issue-10683.rs index 934adc07e2f..df4342bfeb5 100644 --- a/src/test/run-pass/issue-10683.rs +++ b/src/test/run-pass/issue-10683.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::ascii::StrAsciiExt; +use std::ascii::AsciiExt; static NAME: &'static str = "hello world"; |
