diff options
| author | Huon Wilson <dbau.pp+github@gmail.com> | 2014-12-30 13:58:31 +1100 |
|---|---|---|
| committer | Huon Wilson <dbau.pp+github@gmail.com> | 2015-01-05 12:30:51 +1100 |
| commit | 0302d379776fca82d3eb693046239dc66998f691 (patch) | |
| tree | ee410f469e5a025f09d1eca774ca3aa71f9d519e /src/libunicode | |
| parent | 19120209d8e532514203d16a2cff0ad3b44de3bb (diff) | |
| download | rust-0302d379776fca82d3eb693046239dc66998f691.tar.gz rust-0302d379776fca82d3eb693046239dc66998f691.zip | |
Merge `UnicodeChar` and `CharExt`.
This "reexports" all the functionality of `core::char::CharExt` as methods on `unicode::u_char::UnicodeChar` (renamed to `CharExt`). Imports may need to be updated (one now just imports `unicode::CharExt`, or `std::char::CharExt` rather than two traits from either), so this is a [breaking-change]
Diffstat (limited to 'src/libunicode')
| -rw-r--r-- | src/libunicode/lib.rs | 10 | ||||
| -rw-r--r-- | src/libunicode/tables.rs | 2 | ||||
| -rw-r--r-- | src/libunicode/u_char.rs | 107 | ||||
| -rw-r--r-- | src/libunicode/u_str.rs | 6 |
4 files changed, 112 insertions, 13 deletions
diff --git a/src/libunicode/lib.rs b/src/libunicode/lib.rs index 170700fb4d5..a3884d0c86e 100644 --- a/src/libunicode/lib.rs +++ b/src/libunicode/lib.rs @@ -44,9 +44,9 @@ mod u_str; // re-export char so that std et al see it correctly /// Character manipulation (`char` type, Unicode Scalar Value) /// -/// This module provides the `Char` and `UnicodeChar` traits, as well as their -/// implementation for the primitive `char` type, in order to allow basic character -/// manipulation. +/// This module provides the `CharExt` trait, as well as its +/// implementation for the primitive `char` type, in order to allow +/// basic character manipulation. /// /// A `char` actually represents a /// *[Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)*, @@ -58,14 +58,14 @@ mod u_str; /// however the converse is not always true due to the above range limits /// and, as such, should be performed via the `from_u32` function.. pub mod char { - pub use core::char::{MAX, from_u32, from_digit, CharExt}; + pub use core::char::{MAX, from_u32, from_digit}; pub use normalize::{decompose_canonical, decompose_compatible, compose}; pub use tables::normalization::canonical_combining_class; pub use tables::UNICODE_VERSION; - pub use u_char::UnicodeChar; + pub use u_char::CharExt; } pub mod str { diff --git a/src/libunicode/tables.rs b/src/libunicode/tables.rs index e3550810010..c755ea93184 100644 --- a/src/libunicode/tables.rs +++ b/src/libunicode/tables.rs @@ -13,7 +13,7 @@ #![allow(missing_docs, non_upper_case_globals, non_snake_case)] /// The version of [Unicode](http://www.unicode.org/) -/// that the `UnicodeChar` and `UnicodeStrPrelude` traits are based on. +/// that the unicode parts of `CharExt` and `UnicodeStrPrelude` traits are based on. pub const UNICODE_VERSION: (uint, uint, uint) = (7, 0, 0); fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool { diff --git a/src/libunicode/u_char.rs b/src/libunicode/u_char.rs index 9c356801604..c1abfd4e189 100644 --- a/src/libunicode/u_char.rs +++ b/src/libunicode/u_char.rs @@ -8,17 +8,99 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! Unicode-intensive `char` methods. +//! Unicode-intensive `char` methods along with the `core` methods. //! //! These methods implement functionality for `char` that requires knowledge of //! Unicode definitions, including normalization, categorization, and display information. +use core::char; +use core::char::CharExt as C; use core::option::Option; use tables::{derived_property, property, general_category, conversions, charwidth}; -/// Useful functions for Unicode characters. +/// Functionality for manipulating `char`. #[experimental = "pending prelude organization"] -pub trait UnicodeChar { +pub trait CharExt { + /// Checks if a `char` parses as a numeric digit in the given radix. + /// + /// Compared to `is_numeric()`, this function only recognizes the characters + /// `0-9`, `a-z` and `A-Z`. + /// + /// # Return value + /// + /// Returns `true` if `c` is a valid digit under `radix`, and `false` + /// otherwise. + /// + /// # Panics + /// + /// Panics if given a radix > 36. + #[unstable = "pending integer conventions"] + fn is_digit(self, radix: uint) -> bool; + + /// Converts a character to the corresponding digit. + /// + /// # Return value + /// + /// If `c` is between '0' and '9', the corresponding value between 0 and + /// 9. If `c` is 'a' or 'A', 10. If `c` is 'b' or 'B', 11, etc. Returns + /// none if the character does not refer to a digit in the given radix. + /// + /// # Panics + /// + /// Panics if given a radix outside the range [0..36]. + #[unstable = "pending integer conventions"] + fn to_digit(self, radix: uint) -> Option<uint>; + + /// Returns an iterator that yields the hexadecimal Unicode escape + /// of a character, as `char`s. + /// + /// All characters are escaped with Rust syntax of the form `\\u{NNNN}` + /// where `NNNN` is the shortest hexadecimal representation of the code + /// point. + #[stable] + fn escape_unicode(self) -> char::EscapeUnicode; + + /// Returns an iterator that yields the 'default' ASCII and + /// C++11-like literal escape of a character, as `char`s. + /// + /// The default is chosen with a bias toward producing literals that are + /// legal in a variety of languages, including C++11 and similar C-family + /// languages. The exact rules are: + /// + /// * Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively. + /// * Single-quote, double-quote and backslash chars are backslash- + /// escaped. + /// * Any other chars in the range [0x20,0x7e] are not escaped. + /// * Any other chars are given hex Unicode escapes; see `escape_unicode`. + #[stable] + fn escape_default(self) -> char::EscapeDefault; + + /// Returns the amount of bytes this character would need if encoded in + /// UTF-8. + #[stable] + fn len_utf8(self) -> uint; + + /// Returns the amount of bytes this character would need if encoded in + /// UTF-16. + #[stable] + fn len_utf16(self) -> uint; + + /// Encodes this character as UTF-8 into the provided byte buffer, + /// and then returns the number of bytes written. + /// + /// If the buffer is not large enough, nothing will be written into it + /// and a `None` will be returned. + #[unstable = "pending decision about Iterator/Writer/Reader"] + fn encode_utf8(self, dst: &mut [u8]) -> Option<uint>; + + /// Encodes this character as UTF-16 into the provided `u16` buffer, + /// and then returns the number of `u16`s written. + /// + /// If the buffer is not large enough, nothing will be written into it + /// and a `None` will be returned. + #[unstable = "pending decision about Iterator/Writer/Reader"] + fn encode_utf16(self, dst: &mut [u16]) -> Option<uint>; + /// Returns whether the specified character is considered a Unicode /// alphabetic code point. fn is_alphabetic(self) -> bool; @@ -118,7 +200,24 @@ pub trait UnicodeChar { } #[experimental = "pending prelude organization"] -impl UnicodeChar for char { +impl CharExt for char { + #[unstable = "pending integer conventions"] + fn is_digit(self, radix: uint) -> bool { C::is_digit(self, radix) } + #[unstable = "pending integer conventions"] + fn to_digit(self, radix: uint) -> Option<uint> { C::to_digit(self, radix) } + #[stable] + fn escape_unicode(self) -> char::EscapeUnicode { C::escape_unicode(self) } + #[stable] + fn escape_default(self) -> char::EscapeDefault { C::escape_default(self) } + #[stable] + fn len_utf8(self) -> uint { C::len_utf8(self) } + #[stable] + fn len_utf16(self) -> uint { C::len_utf16(self) } + #[unstable = "pending decision about Iterator/Writer/Reader"] + fn encode_utf8(self, dst: &mut [u8]) -> Option<uint> { C::encode_utf8(self, dst) } + #[unstable = "pending decision about Iterator/Writer/Reader"] + fn encode_utf16(self, dst: &mut [u16]) -> Option<uint> { C::encode_utf16(self, dst) } + fn is_alphabetic(self) -> bool { match self { 'a' ... 'z' | 'A' ... 'Z' => true, diff --git a/src/libunicode/u_str.rs b/src/libunicode/u_str.rs index 1b0c4171134..90949437774 100644 --- a/src/libunicode/u_str.rs +++ b/src/libunicode/u_str.rs @@ -13,7 +13,7 @@ //! Unicode-intensive string manipulations. //! //! This module provides functionality to `str` that requires the Unicode methods provided by the -//! UnicodeChar trait. +//! unicode parts of the CharExt trait. use self::GraphemeState::*; use core::prelude::*; @@ -26,7 +26,7 @@ use core::num::Int; use core::slice; use core::str::Split; -use u_char::UnicodeChar; +use u_char::CharExt as UCharExt; // conflicts with core::prelude::CharExt use tables::grapheme::GraphemeCat; /// An iterator over the words of a string, separated by a sequence of whitespace @@ -529,7 +529,7 @@ impl<I> Iterator for Utf16Encoder<I> where I: Iterator<Item=char> { let mut buf = [0u16; 2]; self.chars.next().map(|ch| { - let n = ch.encode_utf16(buf.as_mut_slice()).unwrap_or(0); + let n = CharExt::encode_utf16(ch, buf.as_mut_slice()).unwrap_or(0); if n == 2 { self.extra = buf[1]; } buf[0] }) |
