diff options
| author | Alex Crichton <alex@alexcrichton.com> | 2013-08-10 00:28:47 -0700 |
|---|---|---|
| committer | Alex Crichton <alex@alexcrichton.com> | 2013-08-12 23:18:51 -0700 |
| commit | b820748ff5a2bc09b58dd7ac511e0f607d55f2e9 (patch) | |
| tree | 7a34e364ae23968eadba2e7bc0ecae8e20b3fced /src/libstd | |
| parent | 44675ac6aff91889f960655b0034964740415e8c (diff) | |
| download | rust-b820748ff5a2bc09b58dd7ac511e0f607d55f2e9.tar.gz rust-b820748ff5a2bc09b58dd7ac511e0f607d55f2e9.zip | |
Implement formatting arguments for strings and integers
Closes #1651
Diffstat (limited to 'src/libstd')
| -rw-r--r-- | src/libstd/char.rs | 38 | ||||
| -rw-r--r-- | src/libstd/fmt/mod.rs | 167 | ||||
| -rw-r--r-- | src/libstd/str.rs | 53 | ||||
| -rw-r--r-- | src/libstd/unstable/raw.rs | 1 |
4 files changed, 196 insertions, 63 deletions
diff --git a/src/libstd/char.rs b/src/libstd/char.rs index 9c55e22b1f8..a0635f6f12e 100644 --- a/src/libstd/char.rs +++ b/src/libstd/char.rs @@ -20,6 +20,15 @@ use unicode::{derived_property, general_category}; #[cfg(not(test))] use cmp::{Eq, Ord}; #[cfg(not(test))] use num::Zero; +// UTF-8 ranges and tags for encoding characters +static TAG_CONT: uint = 128u; +static MAX_ONE_B: uint = 128u; +static TAG_TWO_B: uint = 192u; +static MAX_TWO_B: uint = 2048u; +static TAG_THREE_B: uint = 224u; +static MAX_THREE_B: uint = 65536u; +static TAG_FOUR_B: uint = 240u; + /* Lu Uppercase_Letter an uppercase letter Ll Lowercase_Letter a lowercase letter @@ -278,6 +287,12 @@ pub trait Char { fn escape_unicode(&self, f: &fn(char)); fn escape_default(&self, f: &fn(char)); fn len_utf8_bytes(&self) -> uint; + + /// Encodes this character as utf-8 into the provided byte-buffer. The + /// buffer must be at least 4 bytes long or a runtime failure will occur. + /// + /// This will then return the number of characters written to the slice. + fn encode_utf8(&self, dst: &mut [u8]) -> uint; } impl Char for char { @@ -308,6 +323,29 @@ impl Char for char { fn escape_default(&self, f: &fn(char)) { escape_default(*self, f) } fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) } + + fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> uint { + let code = *self as uint; + if code < MAX_ONE_B { + dst[0] = code as u8; + return 1; + } else if code < MAX_TWO_B { + dst[0] = (code >> 6u & 31u | TAG_TWO_B) as u8; + dst[1] = (code & 63u | TAG_CONT) as u8; + return 2; + } else if code < MAX_THREE_B { + dst[0] = (code >> 12u & 15u | TAG_THREE_B) as u8; + dst[1] = (code >> 6u & 63u | TAG_CONT) as u8; + dst[2] = (code & 63u | TAG_CONT) as u8; + return 3; + } else { + dst[0] = (code >> 18u & 7u | TAG_FOUR_B) as u8; + dst[1] = (code >> 12u & 63u | TAG_CONT) as u8; + dst[2] = (code >> 6u & 63u | TAG_CONT) as u8; + dst[3] = (code & 63u | TAG_CONT) as u8; + return 4; + } + } } #[cfg(not(test))] diff --git a/src/libstd/fmt/mod.rs b/src/libstd/fmt/mod.rs index 2b8807b2291..a25620cfa69 100644 --- a/src/libstd/fmt/mod.rs +++ b/src/libstd/fmt/mod.rs @@ -11,7 +11,7 @@ use prelude::*; use cast; -use int; +use char::Char; use rt::io::Decorator; use rt::io::mem::MemWriter; use rt::io; @@ -122,6 +122,11 @@ pub unsafe fn sprintf(fmt: &[rt::Piece], args: &[Argument]) -> ~str { } impl<'self> Formatter<'self> { + + // First up is the collection of functions used to execute a format string + // at runtime. This consumes all of the compile-time statics generated by + // the ifmt! syntax extension. + fn run(&mut self, piece: &rt::Piece, cur: Option<&str>) { let setcount = |slot: &mut Option<uint>, cnt: &parse::Count| { match *cnt { @@ -240,6 +245,118 @@ impl<'self> Formatter<'self> { } } } + + // Helper methods used for padding and processing formatting arguments that + // all formatting traits can use. + + /// TODO: dox + pub fn pad_integral(&mut self, s: &[u8], alternate_prefix: &str, + positive: bool) { + use fmt::parse::{FlagAlternate, FlagSignPlus}; + + let mut actual_len = s.len(); + if self.flags & 1 << (FlagAlternate as uint) != 0 { + actual_len += alternate_prefix.len(); + } + if self.flags & 1 << (FlagSignPlus as uint) != 0 { + actual_len += 1; + } + if !positive { + actual_len += 1; + } + + let emit = |this: &mut Formatter| { + if this.flags & 1 << (FlagSignPlus as uint) != 0 && positive { + this.buf.write(['+' as u8]); + } else if !positive { + this.buf.write(['-' as u8]); + } + if this.flags & 1 << (FlagAlternate as uint) != 0 { + this.buf.write(alternate_prefix.as_bytes()); + } + this.buf.write(s); + }; + + match self.width { + None => { emit(self) } + Some(min) if actual_len >= min => { emit(self) } + Some(min) => { + do self.with_padding(min - actual_len) |me| { + emit(me); + } + } + } + } + + /// This function takes a string slice and emits it to the internal buffer + /// after applying the relevant formatting flags specified. The flags + /// recognized for generic strings are: + /// + /// * width - the minimum width of what to emit + /// * fill/alignleft - what to emit and where to emit it if the string + /// provided needs to be padded + /// * precision - the maximum length to emit, the string is truncated if it + /// is longer than this length + /// + /// Notably this function ignored the `flag` parameters + pub fn pad(&mut self, s: &str) { + // Make sure there's a fast path up front + if self.width.is_none() && self.precision.is_none() { + self.buf.write(s.as_bytes()); + return + } + // The `precision` field can be interpreted as a `max-width` for the + // string being formatted + match self.precision { + Some(max) => { + // If there's a maximum width and our string is longer than + // that, then we must always have truncation. This is the only + // case where the maximum length will matter. + let char_len = s.char_len(); + if char_len >= max { + let nchars = uint::min(max, char_len); + self.buf.write(s.slice_chars(0, nchars).as_bytes()); + return + } + } + None => {} + } + + // The `width` field is more of a `min-width` parameter at this point. + match self.width { + // If we're under the maximum length, and there's no minimum length + // requirements, then we can just emit the string + None => { self.buf.write(s.as_bytes()) } + + // If we're under the maximum width, check if we're over the minimum + // width, if so it's as easy as just emitting the string. + Some(width) if s.char_len() >= width => { + self.buf.write(s.as_bytes()) + } + + // If we're under both the maximum and the minimum width, then fill + // up the minimum width with the specified string + some alignment. + Some(width) => { + do self.with_padding(width - s.len()) |me| { + me.buf.write(s.as_bytes()); + } + } + } + } + + fn with_padding(&mut self, padding: uint, f: &fn(&mut Formatter)) { + if self.alignleft { + f(self); + } + let mut fill = [0u8, ..4]; + let len = self.fill.encode_utf8(fill); + for _ in range(0, padding) { + self.buf.write(fill.slice_to(len)); + } + if !self.alignleft { + f(self); + } + } } /// This is a function which calls are emitted to by the compiler itself to @@ -279,60 +396,53 @@ impl Bool for bool { impl<'self> String for &'self str { fn fmt(s: & &'self str, f: &mut Formatter) { - // XXX: formatting args - f.buf.write(s.as_bytes()) + f.pad(*s); } } impl Char for char { fn fmt(c: &char, f: &mut Formatter) { - // XXX: formatting args - // XXX: shouldn't require an allocation - let mut s = ~""; - s.push_char(*c); - f.buf.write(s.as_bytes()); + let mut utf8 = [0u8, ..4]; + let amt = c.encode_utf8(utf8); + let s: &str = unsafe { cast::transmute(utf8.slice_to(amt)) }; + String::fmt(&s, f); } } impl Signed for int { fn fmt(c: &int, f: &mut Formatter) { - // XXX: formatting args - do int::to_str_bytes(*c, 10) |buf| { - f.buf.write(buf); + do uint::to_str_bytes(c.abs() as uint, 10) |buf| { + f.pad_integral(buf, "", *c >= 0); } } } impl Unsigned for uint { fn fmt(c: &uint, f: &mut Formatter) { - // XXX: formatting args do uint::to_str_bytes(*c, 10) |buf| { - f.buf.write(buf); + f.pad_integral(buf, "", true); } } } impl Octal for uint { fn fmt(c: &uint, f: &mut Formatter) { - // XXX: formatting args do uint::to_str_bytes(*c, 8) |buf| { - f.buf.write(buf); + f.pad_integral(buf, "0o", true); } } } impl LowerHex for uint { fn fmt(c: &uint, f: &mut Formatter) { - // XXX: formatting args do uint::to_str_bytes(*c, 16) |buf| { - f.buf.write(buf); + f.pad_integral(buf, "0x", true); } } } impl UpperHex for uint { fn fmt(c: &uint, f: &mut Formatter) { - // XXX: formatting args do uint::to_str_bytes(*c, 16) |buf| { let mut local = [0u8, ..16]; for (l, &b) in local.mut_iter().zip(buf.iter()) { @@ -341,16 +451,29 @@ impl UpperHex for uint { _ => b, }; } - f.buf.write(local.slice_to(buf.len())); + f.pad_integral(local.slice_to(buf.len()), "0x", true); } } } impl<T> Poly for T { fn fmt(t: &T, f: &mut Formatter) { - // XXX: formatting args - let s = sys::log_str(t); - f.buf.write(s.as_bytes()); + match (f.width, f.precision) { + (None, None) => { + // XXX: sys::log_str should have a variant which takes a stream + // and we should directly call that (avoids unnecessary + // allocations) + let s = sys::log_str(t); + f.buf.write(s.as_bytes()); + } + + // If we have a specified width for formatting, then we have to make + // this allocation of a new string + _ => { + let s = sys::log_str(t); + f.pad(s); + } + } } } diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 9e5f2db4092..10df45922b5 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -33,6 +33,7 @@ use ptr; use ptr::RawPtr; use to_str::ToStr; use uint; +use unstable::raw::{Repr, Slice}; use vec; use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector}; @@ -758,15 +759,7 @@ macro_rules! utf8_acc_cont_byte( ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as uint) ) -// UTF-8 tags and ranges static TAG_CONT_U8: u8 = 128u8; -static TAG_CONT: uint = 128u; -static MAX_ONE_B: uint = 128u; -static TAG_TWO_B: uint = 192u; -static MAX_TWO_B: uint = 2048u; -static TAG_THREE_B: uint = 224u; -static MAX_THREE_B: uint = 65536u; -static TAG_FOUR_B: uint = 240u; static MAX_UNICODE: uint = 1114112u; /// Unsafe operations @@ -1988,40 +1981,18 @@ impl OwnedStr for ~str { #[inline] fn push_char(&mut self, c: char) { assert!((c as uint) < MAX_UNICODE); // FIXME: #7609: should be enforced on all `char` + let cur_len = self.len(); + self.reserve_at_least(cur_len + 4); // may use up to 4 bytes + + // Attempt to not use an intermediate buffer by just pushing bytes + // directly onto this string. unsafe { - let code = c as uint; - let nb = if code < MAX_ONE_B { 1u } - else if code < MAX_TWO_B { 2u } - else if code < MAX_THREE_B { 3u } - else { 4u }; - let len = self.len(); - let new_len = len + nb; - self.reserve_at_least(new_len); - let off = len as int; - do self.as_mut_buf |buf, _len| { - match nb { - 1u => { - *ptr::mut_offset(buf, off) = code as u8; - } - 2u => { - *ptr::mut_offset(buf, off) = (code >> 6u & 31u | TAG_TWO_B) as u8; - *ptr::mut_offset(buf, off + 1) = (code & 63u | TAG_CONT) as u8; - } - 3u => { - *ptr::mut_offset(buf, off) = (code >> 12u & 15u | TAG_THREE_B) as u8; - *ptr::mut_offset(buf, off + 1) = (code >> 6u & 63u | TAG_CONT) as u8; - *ptr::mut_offset(buf, off + 2) = (code & 63u | TAG_CONT) as u8; - } - 4u => { - *ptr::mut_offset(buf, off) = (code >> 18u & 7u | TAG_FOUR_B) as u8; - *ptr::mut_offset(buf, off + 1) = (code >> 12u & 63u | TAG_CONT) as u8; - *ptr::mut_offset(buf, off + 2) = (code >> 6u & 63u | TAG_CONT) as u8; - *ptr::mut_offset(buf, off + 3) = (code & 63u | TAG_CONT) as u8; - } - _ => {} - } - } - raw::set_len(self, new_len); + let v = self.repr(); + let len = c.encode_utf8(cast::transmute(Slice { + data: ((&(*v).data) as *u8).offset(cur_len as int), + len: 4, + })); + raw::set_len(self, cur_len + len); } } diff --git a/src/libstd/unstable/raw.rs b/src/libstd/unstable/raw.rs index 0e074b53d6b..bdf84604fb3 100644 --- a/src/libstd/unstable/raw.rs +++ b/src/libstd/unstable/raw.rs @@ -56,6 +56,7 @@ impl<'self, T> Repr<Slice<T>> for &'self [T] {} impl<'self> Repr<Slice<u8>> for &'self str {} impl<T> Repr<*Box<T>> for @T {} impl<T> Repr<*Box<Vec<T>>> for @[T] {} +impl Repr<*String> for ~str {} // sure would be nice to have this // impl<T> Repr<*Vec<T>> for ~[T] {} |
