diff options
| author | bors <bors@rust-lang.org> | 2013-08-13 19:23:21 -0700 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2013-08-13 19:23:21 -0700 |
| commit | 7585b34d3171eb79057dbf8196312b08a5bc328b (patch) | |
| tree | 3531f8443816138646d28cd0651e09d9190a8886 /src/libstd | |
| parent | 433fbe8fcf13724445cc57c85cc454352c969429 (diff) | |
| parent | 36882b3d54043efc9f78459da39471fb8d5e6239 (diff) | |
| download | rust-7585b34d3171eb79057dbf8196312b08a5bc328b.tar.gz rust-7585b34d3171eb79057dbf8196312b08a5bc328b.zip | |
auto merge of #8446 : alexcrichton/rust/ifmt++, r=graydon
This includes a number of improvements to `ifmt!`
* Implements formatting arguments -- `{:0.5x}` works now
* Formatting now works on all integer widths, not just `int` and `uint`
* Added a large doc block to `std::fmt` which should help explain what `ifmt!` is all about
* Added floating point formatters, although they have the same pitfalls from before (they're just proof-of-concept now)
Closed a couple of issues along the way, yay! Once this gets into a snapshot, I'll start looking into removing all of `fmt`
Diffstat (limited to 'src/libstd')
| -rw-r--r-- | src/libstd/char.rs | 38 | ||||
| -rw-r--r-- | src/libstd/fmt/mod.rs | 607 | ||||
| -rw-r--r-- | src/libstd/fmt/parse.rs | 38 | ||||
| -rw-r--r-- | src/libstd/fmt/rt.rs | 2 | ||||
| -rw-r--r-- | src/libstd/str.rs | 53 | ||||
| -rw-r--r-- | src/libstd/unstable/raw.rs | 1 |
6 files changed, 622 insertions, 117 deletions
diff --git a/src/libstd/char.rs b/src/libstd/char.rs index 9c55e22b1f8..a0635f6f12e 100644 --- a/src/libstd/char.rs +++ b/src/libstd/char.rs @@ -20,6 +20,15 @@ use unicode::{derived_property, general_category}; #[cfg(not(test))] use cmp::{Eq, Ord}; #[cfg(not(test))] use num::Zero; +// UTF-8 ranges and tags for encoding characters +static TAG_CONT: uint = 128u; +static MAX_ONE_B: uint = 128u; +static TAG_TWO_B: uint = 192u; +static MAX_TWO_B: uint = 2048u; +static TAG_THREE_B: uint = 224u; +static MAX_THREE_B: uint = 65536u; +static TAG_FOUR_B: uint = 240u; + /* Lu Uppercase_Letter an uppercase letter Ll Lowercase_Letter a lowercase letter @@ -278,6 +287,12 @@ pub trait Char { fn escape_unicode(&self, f: &fn(char)); fn escape_default(&self, f: &fn(char)); fn len_utf8_bytes(&self) -> uint; + + /// Encodes this character as utf-8 into the provided byte-buffer. The + /// buffer must be at least 4 bytes long or a runtime failure will occur. + /// + /// This will then return the number of characters written to the slice. + fn encode_utf8(&self, dst: &mut [u8]) -> uint; } impl Char for char { @@ -308,6 +323,29 @@ impl Char for char { fn escape_default(&self, f: &fn(char)) { escape_default(*self, f) } fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) } + + fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> uint { + let code = *self as uint; + if code < MAX_ONE_B { + dst[0] = code as u8; + return 1; + } else if code < MAX_TWO_B { + dst[0] = (code >> 6u & 31u | TAG_TWO_B) as u8; + dst[1] = (code & 63u | TAG_CONT) as u8; + return 2; + } else if code < MAX_THREE_B { + dst[0] = (code >> 12u & 15u | TAG_THREE_B) as u8; + dst[1] = (code >> 6u & 63u | TAG_CONT) as u8; + dst[2] = (code & 63u | TAG_CONT) as u8; + return 3; + } else { + dst[0] = (code >> 18u & 7u | TAG_FOUR_B) as u8; + dst[1] = (code >> 12u & 63u | TAG_CONT) as u8; + dst[2] = (code >> 6u & 63u | TAG_CONT) as u8; + dst[3] = (code & 63u | TAG_CONT) as u8; + return 4; + } + } } #[cfg(not(test))] diff --git a/src/libstd/fmt/mod.rs b/src/libstd/fmt/mod.rs index 2b8807b2291..41e588934b7 100644 --- a/src/libstd/fmt/mod.rs +++ b/src/libstd/fmt/mod.rs @@ -8,16 +8,317 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +/**! + +# The Formatting Module + +This module contains the runtime support for the `ifmt!` syntax extension. This +macro is implemented in the compiler to emit calls to this module in order to +format arguments at runtime into strings and streams. + +The functions contained in this module should not normally be used in everyday +use cases of `ifmt!`. The assumptions made by these functions are unsafe for all +inputs, and the compiler performs a large amount of validation on the arguments +to `ifmt!` in order to ensure safety at runtime. While it is possible to call +these functions directly, it is not recommended to do so in the general case. + +## Usage + +The `ifmt!` macro is intended to be familiar to those coming from C's +printf/sprintf functions or Python's `str.format` function. In its current +revision, the `ifmt!` macro returns a `~str` type which is the result of the +formatting. In the future it will also be able to pass in a stream to format +arguments directly while performing minimal allocations. + +Some examples of the `ifmt!` extension are: + +~~~{.rust} +ifmt!("Hello") // => ~"Hello" +ifmt!("Hello, {:s}!", "world") // => ~"Hello, world!" +ifmt!("The number is {:d}", 1) // => ~"The number is 1" +ifmt!("{}", ~[3, 4]) // => ~"~[3, 4]" +ifmt!("{value}", value=4) // => ~"4" +ifmt!("{} {}", 1, 2) // => ~"1 2" +~~~ + +From these, you can see that the first argument is a format string. It is +required by the compiler for this to be a string literal; it cannot be a +variable passed in (in order to perform validity checking). The compiler will +then parse the format string and determine if the list of arguments provided is +suitable to pass to this format string. + +### Positional parameters + +Each formatting argument is allowed to specify which value argument it's +referencing, and if omitted it is assumed to be "the next argument". For +example, the format string `{} {} {}` would take three parameters, and they +would be formatted in the same order as they're given. The format string +`{2} {1} {0}`, however, would format arguments in reverse order. + +A format string is required to use all of its arguments, otherwise it is a +compile-time error. You may refer to the same argument more than once in the +format string, although it must always be referred to with the same type. + +### Named parameters + +Rust itself does not have a Python-like equivalent of named parameters to a +function, but the `ifmt!` macro is a syntax extension which allows it to +leverage named parameters. Named parameters are listed at the end of the +argument list and have the syntax: + +~~~ +identifier '=' expression +~~~ + +It is illegal to put positional parameters (those without names) after arguments +which have names. Like positional parameters, it is illegal to provided named +parameters that are unused by the format string. + +### Argument types + +Each argument's type is dictated by the format string. It is a requirement that +every argument is only ever referred to by one type. When specifying the format +of an argument, however, a string like `{}` indicates no type. This is allowed, +and if all references to one argument do not provide a type, then the format `?` +is used (the type's rust-representation is printed). For example, this is an +invalid format string: + +~~~ +{0:d} {0:s} +~~~ + +Because the first argument is both referred to as an integer as well as a +string. + +Because formatting is done via traits, there is no requirement that the +`d` format actually takes an `int`, but rather it simply requires a type which +ascribes to the `Signed` formatting trait. There are various parameters which do +require a particular type, however. Namely if the sytnax `{:.*s}` is used, then +the number of characters to print from the string precedes the actual string and +must have the type `uint`. Although a `uint` can be printed with `{:u}`, it is +illegal to reference an argument as such. For example, this is another invalid +format string: + +~~~ +{:.*s} {0:u} +~~~ + +### Formatting traits + +When requesting that an argument be formatted with a particular type, you are +actually requesting that an argument ascribes to a particular trait. This allows +multiple actual types to be formatted via `{:d}` (like `i8` as well as `int`). +The current mapping of types to traits is: + +* `?` => Poly +* `d` => Signed +* `i` => Signed +* `u` => Unsigned +* `b` => Bool +* `c` => Char +* `o` => Octal +* `x` => LowerHex +* `X` => UpperHex +* `s` => String +* `p` => Pointer +* `t` => Binary +* `f` => Float + +What this means is that any type of argument which implements the +`std::fmt::Binary` trait can then be formatted with `{:t}`. Implementations are +provided for these traits for a number of primitive types by the standard +library as well. Again, the default formatting type (if no other is specified) +is `?` which is defined for all types by default. + +When implementing a format trait for your own time, you will have to implement a +method of the signature: + +~~~ +fn fmt(value: &T, f: &mut std::fmt::Formatter); +~~~ + +Your type will be passed by-reference in `value`, and then the function should +emit output into the `f.buf` stream. It is up to each format trait +implementation to correctly adhere to the requested formatting parameters. The +values of these parameters will be listed in the fields of the `Formatter` +struct. In order to help with this, the `Formatter` struct also provides some +helper methods. + +## Internationalization + +The formatting syntax supported by the `ifmt!` extension supports +internationalization by providing "methods" which execute various differnet +outputs depending on the input. The syntax and methods provided are similar to +other internationalization systems, so again nothing should seem alien. +Currently two methods are supported by this extension: "select" and "plural". + +Each method will execute one of a number of clauses, and then the value of the +clause will become what's the result of the argument's format. Inside of the +cases, nested argument strings may be provided, but all formatting arguments +must not be done through implicit positional means. All arguments inside of each +case of a method must be explicitly selected by their name or their integer +position. + +Furthermore, whenever a case is running, the special character `#` can be used +to reference the string value of the argument which was selected upon. As an +example: + +~~~ +ifmt!("{0, select, other{#}}", "hello") // => ~"hello" +~~~ + +This example is the equivalent of `{0:s}` essentially. + +### Select + +The select method is a switch over a `&str` parameter, and the parameter *must* +be of the type `&str`. An example of the syntax is: + +~~~ +{0, select, male{...} female{...} other{...}} +~~~ + +Breaking this down, the `0`-th argument is selected upon with the `select` +method, and then a number of cases follow. Each case is preceded by an +identifier which is the match-clause to execute the given arm. In this case, +there are two explicit cases, `male` and `female`. The case will be executed if +the string argument provided is an exact match to the case selected. + +The `other` case is also a required case for all `select` methods. This arm will +be executed if none of the other arms matched the word being selected over. + +### Plural + +The plural method is a switch statement over a `uint` parameter, and the +parameter *must* be a `uint`. A plural method in its full glory can be specified +as: + +~~~ +{0, plural, offset=1 =1{...} two{...} many{...} other{...}} +~~~ + +To break this down, the first `0` indicates that this method is selecting over +the value of the first positional parameter to the format string. Next, the +`plural` method is being executed. An optionally-supplied `offset` is then given +which indicates a number to subtract from argument `0` when matching. This is +then followed by a list of cases. + +Each case is allowed to supply a specific value to match upon with the syntax +`=N`. This case is executed if the value at argument `0` matches N exactly, +without taking the offset into account. A case may also be specified by one of +five keywords: `zero`, `one`, `two`, `few`, and `many`. These cases are matched +on after argument `0` has the offset taken into account. Currently the +definitions of `many` and `few` are hardcoded, but they are in theory defined by +the current locale. + +Finally, all `plural` methods must have an `other` case supplied which will be +executed if none of the other cases match. + +## Syntax + +The syntax for the formatting language used is drawn from other languages, so it +should not be too alien. Arguments are formatted with python-like syntax, +meaning that arguments are surrounded by `{}` instead of the C-like `%`. The +actual grammar for the formatting syntax is: + +~~~ +format_string := <text> [ format <text> ] * +format := '{' [ argument ] [ ':' format_spec ] [ ',' function_spec ] '}' +argument := integer | identifier + +format_spec := [[fill]align][sign]['#'][0][width]['.' precision][type] +fill := character +align := '<' | '>' +sign := '+' | '-' +width := count +precision := count | '*' +type := identifier | '' +count := parameter | integer +parameter := integer '$' + +function_spec := plural | select +select := 'select' ',' ( identifier arm ) * +plural := 'plural' ',' [ 'offset:' integer ] ( selector arm ) * +selector := '=' integer | keyword +keyword := 'zero' | 'one' | 'two' | 'few' | 'many' | 'other' +arm := '{' format_string '}' +~~~ + +## Formatting Parameters + +Each argument being formatted can be transformed by a number of formatting +parameters (corresponding to `format_spec` in the syntax above). These +parameters affect the string representation of what's being formatted. This +syntax draws heavily from Python's, so it may seem a bit familiar. + +### Fill/Alignment + +The fill character is provided normally in conjunction with the `width` +parameter. This indicates that if the value being formatted is smaller than +`width` some extra characters will be printed around it. The extra characters +are specified by `fill`, and the alignment can be one of two options: + +* `<` - the argument is left-aligned in `width` columns +* `>` - the argument is right-aligned in `width` columns + +### Sign/#/0 + +These can all be interpreted as flags for a particular formatter. + +* '+' - This is intended for numeric types and indicates that the sign should + always be printed. Positive signs are never printed by default, and the + negative sign is only printed by default for the `Signed` trait. This + flag indicates that the correct sign (+ or -) should always be printed. +* '-' - Currently not used +* '#' - This flag is indicates that the "alternate" form of printing should be + used. By default, this only applies to the integer formatting traits and + performs like: + * `x` - precedes the argument with a "0x" + * `X` - precedes the argument with a "0x" + * `t` - precedes the argument with a "0b" + * `o` - precedes the argument with a "0o" +* '0' - This is used to indicate for integer formats that the padding should + both be done with a `0` character as well as be sign-aware. A format + like `{:08d}` would yield `00000001` for the integer `1`, while the same + format would yield `-0000001` for the integer `-1`. Notice that the + negative version has one fewer zero than the positive version. + +### Width + +This is a parameter for the "minimum width" that the format should take up. If +the value's string does not fill up this many characters, then the padding +specified by fill/alignment will be used to take up the required space. + +The default fill/alignment for non-numerics is a space and left-aligned. The +defaults for numeric formatters is also a space but with right-alignment. If the +'0' flag is specified for numerics, then the implicit fill character is '0'. + +The value for the width can also be provided as a `uint` in the list of +parameters by using the `2$` syntax indicating that the second argument is a +`uint` specifying the width. + +### Precision + +For non-numeric types, this can be considered a "maximum width". If the +resulting string is longer than this width, then it is truncated down to this +many characters and only those are emitted. + +For integral types, this has no meaning currently. + +For floating-point types, this indicates how many digits after the decimal point +should be printed. + +*/ + use prelude::*; use cast; -use int; +use char::Char; use rt::io::Decorator; use rt::io::mem::MemWriter; use rt::io; use str; use sys; -use uint; use util; use vec; @@ -33,7 +334,7 @@ pub struct Formatter<'self> { /// Character used as 'fill' whenever there is alignment fill: char, /// Boolean indication of whether the output should be left-aligned - alignleft: bool, + align: parse::Alignment, /// Optionally specified integer width that the output should be width: Option<uint>, /// Optionally specified precision for numeric types @@ -77,6 +378,8 @@ pub trait String { fn fmt(&Self, &mut Formatter); } pub trait Poly { fn fmt(&Self, &mut Formatter); } #[allow(missing_doc)] pub trait Pointer { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait Float { fn fmt(&Self, &mut Formatter); } /// The sprintf function takes a precompiled format string and a list of /// arguments, to return the resulting formatted string. @@ -109,7 +412,7 @@ pub unsafe fn sprintf(fmt: &[rt::Piece], args: &[Argument]) -> ~str { precision: None, // FIXME(#8248): shouldn't need a transmute buf: cast::transmute(&output as &io::Writer), - alignleft: false, + align: parse::AlignUnknown, fill: ' ', args: args, curarg: args.iter(), @@ -122,6 +425,11 @@ pub unsafe fn sprintf(fmt: &[rt::Piece], args: &[Argument]) -> ~str { } impl<'self> Formatter<'self> { + + // First up is the collection of functions used to execute a format string + // at runtime. This consumes all of the compile-time statics generated by + // the ifmt! syntax extension. + fn run(&mut self, piece: &rt::Piece, cur: Option<&str>) { let setcount = |slot: &mut Option<uint>, cnt: &parse::Count| { match *cnt { @@ -144,7 +452,7 @@ impl<'self> Formatter<'self> { rt::Argument(ref arg) => { // Fill in the format parameters into the formatter self.fill = arg.format.fill; - self.alignleft = arg.format.alignleft; + self.align = arg.format.align; self.flags = arg.format.flags; setcount(&mut self.width, &arg.format.width); setcount(&mut self.precision, &arg.format.precision); @@ -233,13 +541,154 @@ impl<'self> Formatter<'self> { } fn runplural(&mut self, value: uint, pieces: &[rt::Piece]) { - do uint::to_str_bytes(value, 10) |buf| { + do ::uint::to_str_bytes(value, 10) |buf| { let valuestr = str::from_bytes_slice(buf); for piece in pieces.iter() { self.run(piece, Some(valuestr)); } } } + + // Helper methods used for padding and processing formatting arguments that + // all formatting traits can use. + + /// Performs the correct padding for an integer which has already been + /// emitted into a byte-array. The byte-array should *not* contain the sign + /// for the integer, that will be added by this method. + /// + /// # Arguments + /// + /// * s - the byte array that the number has been formatted into + /// * alternate_prefix - if the '#' character (FlagAlternate) is + /// provided, this is the prefix to put in front of the number. + /// Currently this is 0x/0o/0b/etc. + /// * positive - whether the original integer was positive or not. + /// + /// This function will correctly account for the flags provided as well as + /// the minimum width. It will not take precision into account. + pub fn pad_integral(&mut self, s: &[u8], alternate_prefix: &str, + positive: bool) { + use fmt::parse::{FlagAlternate, FlagSignPlus, FlagSignAwareZeroPad}; + + let mut actual_len = s.len(); + if self.flags & 1 << (FlagAlternate as uint) != 0 { + actual_len += alternate_prefix.len(); + } + if self.flags & 1 << (FlagSignPlus as uint) != 0 { + actual_len += 1; + } else if !positive { + actual_len += 1; + } + + let mut signprinted = false; + let sign = |this: &mut Formatter| { + if !signprinted { + if this.flags & 1 << (FlagSignPlus as uint) != 0 && positive { + this.buf.write(['+' as u8]); + } else if !positive { + this.buf.write(['-' as u8]); + } + if this.flags & 1 << (FlagAlternate as uint) != 0 { + this.buf.write(alternate_prefix.as_bytes()); + } + signprinted = true; + } + }; + + let emit = |this: &mut Formatter| { + sign(this); + this.buf.write(s); + }; + + match self.width { + None => { emit(self) } + Some(min) if actual_len >= min => { emit(self) } + Some(min) => { + if self.flags & 1 << (FlagSignAwareZeroPad as uint) != 0 { + self.fill = '0'; + sign(self); + } + do self.with_padding(min - actual_len, parse::AlignRight) |me| { + emit(me); + } + } + } + } + + /// This function takes a string slice and emits it to the internal buffer + /// after applying the relevant formatting flags specified. The flags + /// recognized for generic strings are: + /// + /// * width - the minimum width of what to emit + /// * fill/align - what to emit and where to emit it if the string + /// provided needs to be padded + /// * precision - the maximum length to emit, the string is truncated if it + /// is longer than this length + /// + /// Notably this function ignored the `flag` parameters + pub fn pad(&mut self, s: &str) { + // Make sure there's a fast path up front + if self.width.is_none() && self.precision.is_none() { + self.buf.write(s.as_bytes()); + return + } + // The `precision` field can be interpreted as a `max-width` for the + // string being formatted + match self.precision { + Some(max) => { + // If there's a maximum width and our string is longer than + // that, then we must always have truncation. This is the only + // case where the maximum length will matter. + let char_len = s.char_len(); + if char_len >= max { + let nchars = ::uint::min(max, char_len); + self.buf.write(s.slice_chars(0, nchars).as_bytes()); + return + } + } + None => {} + } + + // The `width` field is more of a `min-width` parameter at this point. + match self.width { + // If we're under the maximum length, and there's no minimum length + // requirements, then we can just emit the string + None => { self.buf.write(s.as_bytes()) } + + // If we're under the maximum width, check if we're over the minimum + // width, if so it's as easy as just emitting the string. + Some(width) if s.char_len() >= width => { + self.buf.write(s.as_bytes()) + } + + // If we're under both the maximum and the minimum width, then fill + // up the minimum width with the specified string + some alignment. + Some(width) => { + do self.with_padding(width - s.len(), parse::AlignLeft) |me| { + me.buf.write(s.as_bytes()); + } + } + } + } + + fn with_padding(&mut self, padding: uint, + default: parse::Alignment, f: &fn(&mut Formatter)) { + let align = match self.align { + parse::AlignUnknown => default, + parse::AlignLeft | parse::AlignRight => self.align + }; + if align == parse::AlignLeft { + f(self); + } + let mut fill = [0u8, ..4]; + let len = self.fill.encode_utf8(fill); + for _ in range(0, padding) { + self.buf.write(fill.slice_to(len)); + } + if align == parse::AlignRight { + f(self); + } + } } /// This is a function which calls are emitted to by the compiler itself to @@ -279,78 +728,119 @@ impl Bool for bool { impl<'self> String for &'self str { fn fmt(s: & &'self str, f: &mut Formatter) { - // XXX: formatting args - f.buf.write(s.as_bytes()) + f.pad(*s); } } impl Char for char { fn fmt(c: &char, f: &mut Formatter) { - // XXX: formatting args - // XXX: shouldn't require an allocation - let mut s = ~""; - s.push_char(*c); - f.buf.write(s.as_bytes()); + let mut utf8 = [0u8, ..4]; + let amt = c.encode_utf8(utf8); + let s: &str = unsafe { cast::transmute(utf8.slice_to(amt)) }; + String::fmt(&s, f); } } -impl Signed for int { - fn fmt(c: &int, f: &mut Formatter) { - // XXX: formatting args - do int::to_str_bytes(*c, 10) |buf| { - f.buf.write(buf); +macro_rules! int_base(($ty:ident, $into:ident, $base:expr, + $name:ident, $prefix:expr) => { + impl $name for $ty { + fn fmt(c: &$ty, f: &mut Formatter) { + do ::$into::to_str_bytes(*c as $into, $base) |buf| { + f.pad_integral(buf, $prefix, true); + } } } -} - -impl Unsigned for uint { - fn fmt(c: &uint, f: &mut Formatter) { - // XXX: formatting args - do uint::to_str_bytes(*c, 10) |buf| { - f.buf.write(buf); +}) +macro_rules! upper_hex(($ty:ident, $into:ident) => { + impl UpperHex for $ty { + fn fmt(c: &$ty, f: &mut Formatter) { + do ::$into::to_str_bytes(*c as $into, 16) |buf| { + upperhex(buf, f); + } } } -} - -impl Octal for uint { - fn fmt(c: &uint, f: &mut Formatter) { - // XXX: formatting args - do uint::to_str_bytes(*c, 8) |buf| { - f.buf.write(buf); +}) +// Not sure why, but this causes an "unresolved enum variant, struct or const" +// when inlined into the above macro... +#[doc(hidden)] +pub fn upperhex(buf: &[u8], f: &mut Formatter) { + let mut local = [0u8, ..16]; + for i in ::iterator::range(0, buf.len()) { + local[i] = match buf[i] as char { + 'a' .. 'f' => (buf[i] - 'a' as u8) + 'A' as u8, + c => c as u8, } } + f.pad_integral(local.slice_to(buf.len()), "0x", true); } -impl LowerHex for uint { - fn fmt(c: &uint, f: &mut Formatter) { - // XXX: formatting args - do uint::to_str_bytes(*c, 16) |buf| { - f.buf.write(buf); +// FIXME(#4375) shouldn't need an inner module +macro_rules! integer(($signed:ident, $unsigned:ident) => { + mod $signed { + use super::*; + + // Signed is special because it actuall emits the negative sign, + // nothing else should do that, however. + impl Signed for $signed { + fn fmt(c: &$signed, f: &mut Formatter) { + do ::$unsigned::to_str_bytes(c.abs() as $unsigned, 10) |buf| { + f.pad_integral(buf, "", *c >= 0); + } + } } + int_base!($signed, $unsigned, 2, Binary, "0b") + int_base!($signed, $unsigned, 8, Octal, "0o") + int_base!($signed, $unsigned, 16, LowerHex, "0x") + upper_hex!($signed, $unsigned) + + int_base!($unsigned, $unsigned, 2, Binary, "0b") + int_base!($unsigned, $unsigned, 8, Octal, "0o") + int_base!($unsigned, $unsigned, 10, Unsigned, "") + int_base!($unsigned, $unsigned, 16, LowerHex, "0x") + upper_hex!($unsigned, $unsigned) } -} - -impl UpperHex for uint { - fn fmt(c: &uint, f: &mut Formatter) { - // XXX: formatting args - do uint::to_str_bytes(*c, 16) |buf| { - let mut local = [0u8, ..16]; - for (l, &b) in local.mut_iter().zip(buf.iter()) { - *l = match b as char { - 'a' .. 'f' => (b - 'a' as u8) + 'A' as u8, - _ => b, - }; - } - f.buf.write(local.slice_to(buf.len())); +}) + +integer!(int, uint) +integer!(i8, u8) +integer!(i16, u16) +integer!(i32, u32) +integer!(i64, u64) + +macro_rules! floating(($ty:ident) => { + impl Float for $ty { + fn fmt(f: &$ty, fmt: &mut Formatter) { + // XXX: this shouldn't perform an allocation + let s = match fmt.precision { + Some(i) => ::$ty::to_str_exact(f.abs(), i), + None => ::$ty::to_str_digits(f.abs(), 6) + }; + fmt.pad_integral(s.as_bytes(), "", *f >= 0.0); } } -} +}) +floating!(float) +floating!(f32) +floating!(f64) impl<T> Poly for T { fn fmt(t: &T, f: &mut Formatter) { - // XXX: formatting args - let s = sys::log_str(t); - f.buf.write(s.as_bytes()); + match (f.width, f.precision) { + (None, None) => { + // XXX: sys::log_str should have a variant which takes a stream + // and we should directly call that (avoids unnecessary + // allocations) + let s = sys::log_str(t); + f.buf.write(s.as_bytes()); + } + + // If we have a specified width for formatting, then we have to make + // this allocation of a new string + _ => { + let s = sys::log_str(t); + f.pad(s); + } + } } } @@ -358,9 +848,10 @@ impl<T> Poly for T { // time. impl<T> Pointer for *const T { fn fmt(t: &*const T, f: &mut Formatter) { - // XXX: formatting args - f.buf.write("0x".as_bytes()); - LowerHex::fmt(&(*t as uint), f); + f.flags |= 1 << (parse::FlagAlternate as uint); + do ::uint::to_str_bytes(*t as uint, 16) |buf| { + f.pad_integral(buf, "0x", true); + } } } diff --git a/src/libstd/fmt/parse.rs b/src/libstd/fmt/parse.rs index 673ea1d3fa8..0d39ae84a60 100644 --- a/src/libstd/fmt/parse.rs +++ b/src/libstd/fmt/parse.rs @@ -47,7 +47,7 @@ pub struct FormatSpec<'self> { /// Optionally specified character to fill alignment with fill: Option<char>, /// Optionally specified alignment - align: Option<Alignment>, + align: Alignment, /// Packed version of various flags provided flags: uint, /// The integer precision to use @@ -68,7 +68,7 @@ pub enum Position<'self> { /// Enum of alignments which are supoprted. #[deriving(Eq)] -pub enum Alignment { AlignLeft, AlignRight } +pub enum Alignment { AlignLeft, AlignRight, AlignUnknown } /// Various flags which can be applied to format strings, the meaning of these /// flags is defined by the formatters themselves. @@ -77,6 +77,7 @@ pub enum Flag { FlagSignPlus, FlagSignMinus, FlagAlternate, + FlagSignAwareZeroPad, } /// A count is used for the precision and width parameters of an integer, and @@ -288,7 +289,7 @@ impl<'self> Parser<'self> { fn format(&mut self) -> FormatSpec<'self> { let mut spec = FormatSpec { fill: None, - align: None, + align: AlignUnknown, flags: 0, precision: CountImplied, width: CountImplied, @@ -311,9 +312,9 @@ impl<'self> Parser<'self> { } // Alignment if self.consume('<') { - spec.align = Some(AlignLeft); + spec.align = AlignLeft; } else if self.consume('>') { - spec.align = Some(AlignRight); + spec.align = AlignRight; } // Sign flags if self.consume('+') { @@ -326,6 +327,9 @@ impl<'self> Parser<'self> { spec.flags |= 1 << (FlagAlternate as uint); } // Width and precision + if self.consume('0') { + spec.flags |= 1 << (FlagSignAwareZeroPad as uint); + } spec.width = self.count(); if self.consume('.') { if self.consume('*') { @@ -597,7 +601,7 @@ mod tests { fn fmtdflt() -> FormatSpec<'static> { return FormatSpec { fill: None, - align: None, + align: AlignUnknown, flags: 0, precision: CountImplied, width: CountImplied, @@ -656,7 +660,7 @@ mod tests { position: ArgumentIs(3), format: FormatSpec { fill: None, - align: None, + align: AlignUnknown, flags: 0, precision: CountImplied, width: CountImplied, @@ -671,7 +675,7 @@ mod tests { position: ArgumentIs(3), format: FormatSpec { fill: None, - align: Some(AlignRight), + align: AlignRight, flags: 0, precision: CountImplied, width: CountImplied, @@ -683,7 +687,7 @@ mod tests { position: ArgumentIs(3), format: FormatSpec { fill: Some('0'), - align: Some(AlignLeft), + align: AlignLeft, flags: 0, precision: CountImplied, width: CountImplied, @@ -695,7 +699,7 @@ mod tests { position: ArgumentIs(3), format: FormatSpec { fill: Some('*'), - align: Some(AlignLeft), + align: AlignLeft, flags: 0, precision: CountImplied, width: CountImplied, @@ -710,7 +714,7 @@ mod tests { position: ArgumentNext, format: FormatSpec { fill: None, - align: None, + align: AlignUnknown, flags: 0, precision: CountImplied, width: CountIs(10), @@ -722,7 +726,7 @@ mod tests { position: ArgumentNext, format: FormatSpec { fill: None, - align: None, + align: AlignUnknown, flags: 0, precision: CountIs(10), width: CountIsParam(10), @@ -734,7 +738,7 @@ mod tests { position: ArgumentNext, format: FormatSpec { fill: None, - align: None, + align: AlignUnknown, flags: 0, precision: CountIsNextParam, width: CountImplied, @@ -746,7 +750,7 @@ mod tests { position: ArgumentNext, format: FormatSpec { fill: None, - align: None, + align: AlignUnknown, flags: 0, precision: CountIsParam(10), width: CountImplied, @@ -761,7 +765,7 @@ mod tests { position: ArgumentNext, format: FormatSpec { fill: None, - align: None, + align: AlignUnknown, flags: (1 << FlagSignMinus as uint), precision: CountImplied, width: CountImplied, @@ -773,7 +777,7 @@ mod tests { position: ArgumentNext, format: FormatSpec { fill: None, - align: None, + align: AlignUnknown, flags: (1 << FlagSignPlus as uint) | (1 << FlagAlternate as uint), precision: CountImplied, width: CountImplied, @@ -788,7 +792,7 @@ mod tests { position: ArgumentIs(3), format: FormatSpec { fill: None, - align: None, + align: AlignUnknown, flags: 0, precision: CountImplied, width: CountImplied, diff --git a/src/libstd/fmt/rt.rs b/src/libstd/fmt/rt.rs index 6feb1d7a848..90763836fc6 100644 --- a/src/libstd/fmt/rt.rs +++ b/src/libstd/fmt/rt.rs @@ -36,7 +36,7 @@ pub struct Argument<'self> { pub struct FormatSpec { fill: char, - alignleft: bool, + align: parse::Alignment, flags: uint, precision: parse::Count, width: parse::Count, diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 886e4d86ab6..b8e61536941 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -33,6 +33,7 @@ use ptr; use ptr::RawPtr; use to_str::ToStr; use uint; +use unstable::raw::{Repr, Slice}; use vec; use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector}; @@ -758,15 +759,7 @@ macro_rules! utf8_acc_cont_byte( ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as uint) ) -// UTF-8 tags and ranges static TAG_CONT_U8: u8 = 128u8; -static TAG_CONT: uint = 128u; -static MAX_ONE_B: uint = 128u; -static TAG_TWO_B: uint = 192u; -static MAX_TWO_B: uint = 2048u; -static TAG_THREE_B: uint = 224u; -static MAX_THREE_B: uint = 65536u; -static TAG_FOUR_B: uint = 240u; static MAX_UNICODE: uint = 1114112u; /// Unsafe operations @@ -1988,40 +1981,18 @@ impl OwnedStr for ~str { #[inline] fn push_char(&mut self, c: char) { assert!((c as uint) < MAX_UNICODE); // FIXME: #7609: should be enforced on all `char` + let cur_len = self.len(); + self.reserve_at_least(cur_len + 4); // may use up to 4 bytes + + // Attempt to not use an intermediate buffer by just pushing bytes + // directly onto this string. unsafe { - let code = c as uint; - let nb = if code < MAX_ONE_B { 1u } - else if code < MAX_TWO_B { 2u } - else if code < MAX_THREE_B { 3u } - else { 4u }; - let len = self.len(); - let new_len = len + nb; - self.reserve_at_least(new_len); - let off = len as int; - do self.as_mut_buf |buf, _len| { - match nb { - 1u => { - *ptr::mut_offset(buf, off) = code as u8; - } - 2u => { - *ptr::mut_offset(buf, off) = (code >> 6u & 31u | TAG_TWO_B) as u8; - *ptr::mut_offset(buf, off + 1) = (code & 63u | TAG_CONT) as u8; - } - 3u => { - *ptr::mut_offset(buf, off) = (code >> 12u & 15u | TAG_THREE_B) as u8; - *ptr::mut_offset(buf, off + 1) = (code >> 6u & 63u | TAG_CONT) as u8; - *ptr::mut_offset(buf, off + 2) = (code & 63u | TAG_CONT) as u8; - } - 4u => { - *ptr::mut_offset(buf, off) = (code >> 18u & 7u | TAG_FOUR_B) as u8; - *ptr::mut_offset(buf, off + 1) = (code >> 12u & 63u | TAG_CONT) as u8; - *ptr::mut_offset(buf, off + 2) = (code >> 6u & 63u | TAG_CONT) as u8; - *ptr::mut_offset(buf, off + 3) = (code & 63u | TAG_CONT) as u8; - } - _ => {} - } - } - raw::set_len(self, new_len); + let v = self.repr(); + let len = c.encode_utf8(cast::transmute(Slice { + data: ((&(*v).data) as *u8).offset(cur_len as int), + len: 4, + })); + raw::set_len(self, cur_len + len); } } diff --git a/src/libstd/unstable/raw.rs b/src/libstd/unstable/raw.rs index 0e074b53d6b..bdf84604fb3 100644 --- a/src/libstd/unstable/raw.rs +++ b/src/libstd/unstable/raw.rs @@ -56,6 +56,7 @@ impl<'self, T> Repr<Slice<T>> for &'self [T] {} impl<'self> Repr<Slice<u8>> for &'self str {} impl<T> Repr<*Box<T>> for @T {} impl<T> Repr<*Box<Vec<T>>> for @[T] {} +impl Repr<*String> for ~str {} // sure would be nice to have this // impl<T> Repr<*Vec<T>> for ~[T] {} |
