diff options
| author | Alex Crichton <alex@alexcrichton.com> | 2013-07-29 01:12:41 -0700 |
|---|---|---|
| committer | Alex Crichton <alex@alexcrichton.com> | 2013-08-07 19:21:43 -0700 |
| commit | ffb670ffcd69ed8e7cd13a7f06375ede752349e2 (patch) | |
| tree | a3e82ac51c713a80d620128813b4fa653451da97 /src/libstd | |
| parent | 5b4244d917cc9341b1ec04c4e245d5f841d3facc (diff) | |
| download | rust-ffb670ffcd69ed8e7cd13a7f06375ede752349e2.tar.gz rust-ffb670ffcd69ed8e7cd13a7f06375ede752349e2.zip | |
Add initial support for a new formatting syntax
The new macro is available under the name ifmt! (only an intermediate name)
Diffstat (limited to 'src/libstd')
| -rw-r--r-- | src/libstd/either.rs | 2 | ||||
| -rw-r--r-- | src/libstd/fmt/mod.rs | 368 | ||||
| -rw-r--r-- | src/libstd/fmt/parse.rs | 896 | ||||
| -rw-r--r-- | src/libstd/fmt/rt.rs | 62 | ||||
| -rw-r--r-- | src/libstd/rt/io/mem.rs | 2 | ||||
| -rw-r--r-- | src/libstd/std.rs | 3 |
6 files changed, 1331 insertions, 2 deletions
diff --git a/src/libstd/either.rs b/src/libstd/either.rs index cfaef550c6f..bb74d9b3ec4 100644 --- a/src/libstd/either.rs +++ b/src/libstd/either.rs @@ -24,7 +24,7 @@ use vec; use vec::{OwnedVector, ImmutableVector}; /// `Either` is a type that represents one of two alternatives -#[deriving(Clone, Eq)] +#[deriving(Clone, Eq, IterBytes)] pub enum Either<L, R> { Left(L), Right(R) diff --git a/src/libstd/fmt/mod.rs b/src/libstd/fmt/mod.rs new file mode 100644 index 00000000000..2b8807b2291 --- /dev/null +++ b/src/libstd/fmt/mod.rs @@ -0,0 +1,368 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use prelude::*; + +use cast; +use int; +use rt::io::Decorator; +use rt::io::mem::MemWriter; +use rt::io; +use str; +use sys; +use uint; +use util; +use vec; + +pub mod parse; +pub mod rt; + +/// A struct to represent both where to emit formatting strings to and how they +/// should be formatted. A mutable version of this is passed to all formatting +/// traits. +pub struct Formatter<'self> { + /// Flags for formatting (packed version of rt::Flag) + flags: uint, + /// Character used as 'fill' whenever there is alignment + fill: char, + /// Boolean indication of whether the output should be left-aligned + alignleft: bool, + /// Optionally specified integer width that the output should be + width: Option<uint>, + /// Optionally specified precision for numeric types + precision: Option<uint>, + + /// Output buffer. + buf: &'self mut io::Writer, + + priv curarg: vec::VecIterator<'self, Argument<'self>>, + priv args: &'self [Argument<'self>], +} + +/// This struct represents the generic "argument" which is taken by the Xprintf +/// family of functions. It contains a function to format the given value. At +/// compile time it is ensured that the function and the value have the correct +/// types, and then this struct is used to canonicalize arguments to one type. +pub struct Argument<'self> { + priv formatter: extern "Rust" fn(&util::Void, &mut Formatter), + priv value: &'self util::Void, +} + +#[allow(missing_doc)] +pub trait Bool { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait Char { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait Signed { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait Unsigned { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait Octal { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait Binary { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait LowerHex { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait UpperHex { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait String { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait Poly { fn fmt(&Self, &mut Formatter); } +#[allow(missing_doc)] +pub trait Pointer { fn fmt(&Self, &mut Formatter); } + +/// The sprintf function takes a precompiled format string and a list of +/// arguments, to return the resulting formatted string. +/// +/// This is currently an unsafe function because the types of all arguments +/// aren't verified by immediate callers of this function. This currently does +/// not validate that the correct types of arguments are specified for each +/// format specifier, nor that each argument itself contains the right function +/// for formatting the right type value. Because of this, the function is marked +/// as `unsafe` if this is being called manually. +/// +/// Thankfully the rust compiler provides the macro `ifmt!` which will perform +/// all of this validation at compile-time and provides a safe interface for +/// invoking this function. +/// +/// # Arguments +/// +/// * fmts - the precompiled format string to emit. +/// * args - the list of arguments to the format string. These are only the +/// positional arguments (not named) +/// +/// Note that this function assumes that there are enough arguments for the +/// format string. +pub unsafe fn sprintf(fmt: &[rt::Piece], args: &[Argument]) -> ~str { + let output = MemWriter::new(); + { + let mut formatter = Formatter { + flags: 0, + width: None, + precision: None, + // FIXME(#8248): shouldn't need a transmute + buf: cast::transmute(&output as &io::Writer), + alignleft: false, + fill: ' ', + args: args, + curarg: args.iter(), + }; + for piece in fmt.iter() { + formatter.run(piece, None); + } + } + return str::from_bytes_owned(output.inner()); +} + +impl<'self> Formatter<'self> { + fn run(&mut self, piece: &rt::Piece, cur: Option<&str>) { + let setcount = |slot: &mut Option<uint>, cnt: &parse::Count| { + match *cnt { + parse::CountIs(n) => { *slot = Some(n); } + parse::CountImplied => { *slot = None; } + parse::CountIsParam(i) => { + let v = self.args[i].value; + unsafe { *slot = Some(*(v as *util::Void as *uint)); } + } + parse::CountIsNextParam => { + let v = self.curarg.next().unwrap().value; + unsafe { *slot = Some(*(v as *util::Void as *uint)); } + } + } + }; + + match *piece { + rt::String(s) => { self.buf.write(s.as_bytes()); } + rt::CurrentArgument(()) => { self.buf.write(cur.unwrap().as_bytes()); } + rt::Argument(ref arg) => { + // Fill in the format parameters into the formatter + self.fill = arg.format.fill; + self.alignleft = arg.format.alignleft; + self.flags = arg.format.flags; + setcount(&mut self.width, &arg.format.width); + setcount(&mut self.precision, &arg.format.precision); + + // Extract the correct argument + let value = match arg.position { + rt::ArgumentNext => { *self.curarg.next().unwrap() } + rt::ArgumentIs(i) => self.args[i], + }; + + // Then actually do some printing + match arg.method { + None => { (value.formatter)(value.value, self); } + Some(ref method) => { self.execute(*method, value); } + } + } + } + } + + fn execute(&mut self, method: &rt::Method, arg: Argument) { + match *method { + // Pluralization is selection upon a numeric value specified as the + // parameter. + rt::Plural(offset, ref selectors, ref default) => { + // This is validated at compile-time to be a pointer to a + // '&uint' value. + let value: &uint = unsafe { cast::transmute(arg.value) }; + let value = *value; + + // First, attempt to match against explicit values without the + // offsetted value + for s in selectors.iter() { + match s.selector { + Right(val) if value == val => { + return self.runplural(value, s.result); + } + _ => {} + } + } + + // Next, offset the value and attempt to match against the + // keyword selectors. + let value = value - match offset { Some(i) => i, None => 0 }; + for s in selectors.iter() { + let run = match s.selector { + Left(parse::Zero) => value == 0, + Left(parse::One) => value == 1, + Left(parse::Two) => value == 2, + + // XXX: Few/Many should have a user-specified boundary + // One possible option would be in the function + // pointer of the 'arg: Argument' struct. + Left(parse::Few) => value < 8, + Left(parse::Many) => value >= 8, + + Right(*) => false + }; + if run { + return self.runplural(value, s.result); + } + } + + self.runplural(value, *default); + } + + // Select is just a matching against the string specified. + rt::Select(ref selectors, ref default) => { + // This is validated at compile-time to be a pointer to a + // string slice, + let value: & &str = unsafe { cast::transmute(arg.value) }; + let value = *value; + + for s in selectors.iter() { + if s.selector == value { + for piece in s.result.iter() { + self.run(piece, Some(value)); + } + return; + } + } + for piece in default.iter() { + self.run(piece, Some(value)); + } + } + } + } + + fn runplural(&mut self, value: uint, pieces: &[rt::Piece]) { + do uint::to_str_bytes(value, 10) |buf| { + let valuestr = str::from_bytes_slice(buf); + for piece in pieces.iter() { + self.run(piece, Some(valuestr)); + } + } + } +} + +/// This is a function which calls are emitted to by the compiler itself to +/// create the Argument structures that are passed into the `sprintf` function. +#[doc(hidden)] +pub fn argument<'a, T>(f: extern "Rust" fn(&T, &mut Formatter), + t: &'a T) -> Argument<'a> { + unsafe { + Argument { + formatter: cast::transmute(f), + value: cast::transmute(t) + } + } +} + +/// When the compiler determines that the type of an argument *must* be a string +/// (such as for select), then it invokes this method. +#[doc(hidden)] +pub fn argumentstr<'a>(s: &'a &str) -> Argument<'a> { + argument(String::fmt, s) +} + +/// When the compiler determines that the type of an argument *must* be a uint +/// (such as for plural), then it invokes this method. +#[doc(hidden)] +pub fn argumentuint<'a>(s: &'a uint) -> Argument<'a> { + argument(Unsigned::fmt, s) +} + +// Implementations of the core formatting traits + +impl Bool for bool { + fn fmt(b: &bool, f: &mut Formatter) { + String::fmt(&(if *b {"true"} else {"false"}), f); + } +} + +impl<'self> String for &'self str { + fn fmt(s: & &'self str, f: &mut Formatter) { + // XXX: formatting args + f.buf.write(s.as_bytes()) + } +} + +impl Char for char { + fn fmt(c: &char, f: &mut Formatter) { + // XXX: formatting args + // XXX: shouldn't require an allocation + let mut s = ~""; + s.push_char(*c); + f.buf.write(s.as_bytes()); + } +} + +impl Signed for int { + fn fmt(c: &int, f: &mut Formatter) { + // XXX: formatting args + do int::to_str_bytes(*c, 10) |buf| { + f.buf.write(buf); + } + } +} + +impl Unsigned for uint { + fn fmt(c: &uint, f: &mut Formatter) { + // XXX: formatting args + do uint::to_str_bytes(*c, 10) |buf| { + f.buf.write(buf); + } + } +} + +impl Octal for uint { + fn fmt(c: &uint, f: &mut Formatter) { + // XXX: formatting args + do uint::to_str_bytes(*c, 8) |buf| { + f.buf.write(buf); + } + } +} + +impl LowerHex for uint { + fn fmt(c: &uint, f: &mut Formatter) { + // XXX: formatting args + do uint::to_str_bytes(*c, 16) |buf| { + f.buf.write(buf); + } + } +} + +impl UpperHex for uint { + fn fmt(c: &uint, f: &mut Formatter) { + // XXX: formatting args + do uint::to_str_bytes(*c, 16) |buf| { + let mut local = [0u8, ..16]; + for (l, &b) in local.mut_iter().zip(buf.iter()) { + *l = match b as char { + 'a' .. 'f' => (b - 'a' as u8) + 'A' as u8, + _ => b, + }; + } + f.buf.write(local.slice_to(buf.len())); + } + } +} + +impl<T> Poly for T { + fn fmt(t: &T, f: &mut Formatter) { + // XXX: formatting args + let s = sys::log_str(t); + f.buf.write(s.as_bytes()); + } +} + +// n.b. use 'const' to get an implementation for both '*mut' and '*' at the same +// time. +impl<T> Pointer for *const T { + fn fmt(t: &*const T, f: &mut Formatter) { + // XXX: formatting args + f.buf.write("0x".as_bytes()); + LowerHex::fmt(&(*t as uint), f); + } +} + +// If you expected tests to be here, look instead at the run-pass/ifmt.rs test, +// it's a lot easier than creating all of the rt::Piece structures here. diff --git a/src/libstd/fmt/parse.rs b/src/libstd/fmt/parse.rs new file mode 100644 index 00000000000..673ea1d3fa8 --- /dev/null +++ b/src/libstd/fmt/parse.rs @@ -0,0 +1,896 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use prelude::*; + +use char; +use str; +use iterator; + +condition! { pub parse_error: ~str -> (); } + +/// A piece is a portion of the format string which represents the next part to +/// emit. These are emitted as a stream by the `Parser` class. +#[deriving(Eq)] +pub enum Piece<'self> { + /// A literal string which should directly be emitted + String(&'self str), + /// A back-reference to whatever the current argument is. This is used + /// inside of a method call to refer back to the original argument. + CurrentArgument, + /// This describes that formatting should process the next argument (as + /// specified inside) for emission. + Argument(Argument<'self>), +} + +/// Representation of an argument specification. +#[deriving(Eq)] +pub struct Argument<'self> { + /// Where to find this argument + position: Position<'self>, + /// How to format the argument + format: FormatSpec<'self>, + /// If not `None`, what method to invoke on the argument + method: Option<~Method<'self>> +} + +/// Specification for the formatting of an argument in the format string. +#[deriving(Eq)] +pub struct FormatSpec<'self> { + /// Optionally specified character to fill alignment with + fill: Option<char>, + /// Optionally specified alignment + align: Option<Alignment>, + /// Packed version of various flags provided + flags: uint, + /// The integer precision to use + precision: Count, + /// The string width requested for the resulting format + width: Count, + /// The descriptor string representing the name of the format desired for + /// this argument, this can be empty or any number of characters, although + /// it is required to be one word. + ty: &'self str +} + +/// Enum describing where an argument for a format can be located. +#[deriving(Eq)] +pub enum Position<'self> { + ArgumentNext, ArgumentIs(uint), ArgumentNamed(&'self str) +} + +/// Enum of alignments which are supoprted. +#[deriving(Eq)] +pub enum Alignment { AlignLeft, AlignRight } + +/// Various flags which can be applied to format strings, the meaning of these +/// flags is defined by the formatters themselves. +#[deriving(Eq)] +pub enum Flag { + FlagSignPlus, + FlagSignMinus, + FlagAlternate, +} + +/// A count is used for the precision and width parameters of an integer, and +/// can reference either an argument or a literal integer. +#[deriving(Eq)] +pub enum Count { + CountIs(uint), + CountIsParam(uint), + CountIsNextParam, + CountImplied, +} + +/// Enum describing all of the possible methods which the formatting language +/// currently supports. +#[deriving(Eq)] +pub enum Method<'self> { + /// A plural method selects on an integer over a list of either integer or + /// keyword-defined clauses. The meaning of the keywords is defined by the + /// current locale. + /// + /// An offset is optionally present at the beginning which is used to match + /// against keywords, but it is not matched against the literal integers. + /// + /// The final element of this enum is the default "other" case which is + /// always required to be specified. + Plural(Option<uint>, ~[PluralArm<'self>], ~[Piece<'self>]), + + /// A select method selects over a string. Each arm is a different string + /// which can be selected for. + /// + /// As with `Plural`, a default "other" case is required as well. + Select(~[SelectArm<'self>], ~[Piece<'self>]), +} + +/// Structure representing one "arm" of the `plural` function. +#[deriving(Eq)] +pub struct PluralArm<'self> { + /// A selector can either be specified by a keyword or with an integer + /// literal. + selector: Either<PluralKeyword, uint>, + /// Array of pieces which are the format of this arm + result: ~[Piece<'self>], +} + +/// Enum of the 5 CLDR plural keywords. There is one more, "other", but that is +/// specially placed in the `Plural` variant of `Method` +/// +/// http://www.icu-project.org/apiref/icu4c/classicu_1_1PluralRules.html +#[deriving(Eq, IterBytes)] +pub enum PluralKeyword { + Zero, One, Two, Few, Many +} + +/// Structure representing one "arm" of the `select` function. +#[deriving(Eq)] +pub struct SelectArm<'self> { + /// String selector which guards this arm + selector: &'self str, + /// Array of pieces which are the format of this arm + result: ~[Piece<'self>], +} + +/// The parser structure for interpreting the input format string. This is +/// modelled as an iterator over `Piece` structures to form a stream of tokens +/// being output. +/// +/// This is a recursive-descent parser for the sake of simplicity, and if +/// necessary there's probably lots of room for improvement performance-wise. +pub struct Parser<'self> { + priv input: &'self str, + priv cur: str::CharOffsetIterator<'self>, +} + +impl<'self> iterator::Iterator<Piece<'self>> for Parser<'self> { + fn next(&mut self) -> Option<Piece<'self>> { + match self.cur.clone().next() { + Some((_, '#')) => { self.cur.next(); Some(CurrentArgument) } + Some((_, '{')) => { + self.cur.next(); + let ret = Some(Argument(self.argument())); + if !self.consume('}') { + self.err(~"unterminated format string"); + } + ret + } + Some((pos, '\\')) => { + self.cur.next(); + self.escape(); // ensure it's a valid escape sequence + Some(String(self.string(pos + 1))) // skip the '\' character + } + Some((_, '}')) | None => { None } + Some((pos, _)) => { + Some(String(self.string(pos))) + } + } + } +} + +impl<'self> Parser<'self> { + /// Creates a new parser for the given format string + pub fn new<'a>(s: &'a str) -> Parser<'a> { + Parser { + input: s, + cur: s.char_offset_iter(), + } + } + + /// Notifies of an error. The message doesn't actually need to be of type + /// ~str, but I think it does when this eventually uses conditions so it + /// might as well start using it now. + fn err(&self, msg: ~str) { + parse_error::cond.raise(msg); + } + + /// Optionally consumes the specified character. If the character is not at + /// the current position, then the current iterator isn't moved and false is + /// returned, otherwise the character is consumed and true is returned. + fn consume(&mut self, c: char) -> bool { + match self.cur.clone().next() { + Some((_, maybe)) if c == maybe => { + self.cur.next(); + true + } + Some(*) | None => false, + } + } + + /// Attempts to consume any amount of whitespace followed by a character + fn wsconsume(&mut self, c: char) -> bool { + self.ws(); self.consume(c) + } + + /// Consumes all whitespace characters until the first non-whitespace + /// character + fn ws(&mut self) { + loop { + match self.cur.clone().next() { + Some((_, c)) if char::is_whitespace(c) => { self.cur.next(); } + Some(*) | None => { return } + } + } + } + + /// Consumes an escape sequence, failing if there is not a valid character + /// to be escaped. + fn escape(&mut self) -> char { + match self.cur.next() { + Some((_, c @ '#')) | Some((_, c @ '{')) | + Some((_, c @ '\\')) | Some((_, c @ '}')) => { c } + Some((_, c)) => { + self.err(fmt!("invalid escape character `%c`", c)); + c + } + None => { + self.err(~"expected an escape sequence, but format string was \ + terminated"); + ' ' + } + } + } + + /// Parses all of a string which is to be considered a "raw literal" in a + /// format string. This is everything outside of the braces. + fn string(&mut self, start: uint) -> &'self str { + loop { + // we may not consume the character, so clone the iterator + match self.cur.clone().next() { + Some((pos, '\\')) | Some((pos, '#')) | + Some((pos, '}')) | Some((pos, '{')) => { + return self.input.slice(start, pos); + } + Some(*) => { self.cur.next(); } + None => { + self.cur.next(); + return self.input.slice(start, self.input.len()); + } + } + } + } + + /// Parses an Argument structure, or what's contained within braces inside + /// the format string + fn argument(&mut self) -> Argument<'self> { + Argument { + position: self.position(), + format: self.format(), + method: self.method(), + } + } + + /// Parses a positional argument for a format. This could either be an + /// integer index of an argument, a named argument, or a blank string. + fn position(&mut self) -> Position<'self> { + match self.integer() { + Some(i) => { ArgumentIs(i) } + None => { + match self.cur.clone().next() { + Some((_, c)) if char::is_alphabetic(c) => { + ArgumentNamed(self.word()) + } + _ => ArgumentNext + } + } + } + } + + /// Parses a format specifier at the current position, returning all of the + /// relevant information in the FormatSpec struct. + fn format(&mut self) -> FormatSpec<'self> { + let mut spec = FormatSpec { + fill: None, + align: None, + flags: 0, + precision: CountImplied, + width: CountImplied, + ty: self.input.slice(0, 0), + }; + if !self.consume(':') { return spec } + + // fill character + match self.cur.clone().next() { + Some((_, c)) => { + match self.cur.clone().skip(1).next() { + Some((_, '>')) | Some((_, '<')) => { + spec.fill = Some(c); + self.cur.next(); + } + Some(*) | None => {} + } + } + None => {} + } + // Alignment + if self.consume('<') { + spec.align = Some(AlignLeft); + } else if self.consume('>') { + spec.align = Some(AlignRight); + } + // Sign flags + if self.consume('+') { + spec.flags |= 1 << (FlagSignPlus as uint); + } else if self.consume('-') { + spec.flags |= 1 << (FlagSignMinus as uint); + } + // Alternate marker + if self.consume('#') { + spec.flags |= 1 << (FlagAlternate as uint); + } + // Width and precision + spec.width = self.count(); + if self.consume('.') { + if self.consume('*') { + spec.precision = CountIsNextParam; + } else { + spec.precision = self.count(); + } + } + // Finally the actual format specifier + spec.ty = self.word(); + return spec; + } + + /// Parses a method to be applied to the previously specified argument and + /// its format. The two current supported methods are 'plural' and 'select' + fn method(&mut self) -> Option<~Method<'self>> { + if !self.wsconsume(',') { + return None; + } + self.ws(); + match self.word() { + "select" => { + if !self.wsconsume(',') { + self.err(~"`select` must be followed by `,`"); + } + Some(self.select()) + } + "plural" => { + if !self.wsconsume(',') { + self.err(~"`plural` must be followed by `,`"); + } + Some(self.plural()) + } + "" => { + self.err(~"expected method after comma"); + return None; + } + method => { + self.err(fmt!("unknown method: `%s`", method)); + return None; + } + } + } + + /// Parses a 'select' statement (after the initial 'select' word) + fn select(&mut self) -> ~Method<'self> { + let mut other = None; + let mut arms = ~[]; + // Consume arms one at a time + loop { + self.ws(); + let selector = self.word(); + if selector == "" { + self.err(~"cannot have an empty selector"); + break + } + if !self.wsconsume('{') { + self.err(~"selector must be followed by `{`"); + } + let pieces = self.collect(); + if !self.wsconsume('}') { + self.err(~"selector case must be terminated by `}`"); + } + if selector == "other" { + if !other.is_none() { + self.err(~"multiple `other` statements in `select"); + } + other = Some(pieces); + } else { + arms.push(SelectArm { selector: selector, result: pieces }); + } + self.ws(); + match self.cur.clone().next() { + Some((_, '}')) => { break } + Some(*) | None => {} + } + } + // The "other" selector must be present + let other = match other { + Some(arm) => { arm } + None => { + self.err(~"`select` statement must provide an `other` case"); + ~[] + } + }; + ~Select(arms, other) + } + + /// Parses a 'plural' statement (after the initial 'plural' word) + fn plural(&mut self) -> ~Method<'self> { + let mut offset = None; + let mut other = None; + let mut arms = ~[]; + + // First, attempt to parse the 'offset:' field. We know the set of + // selector words which can appear in plural arms, and the only ones + // which start with 'o' are "other" and "offset", hence look two + // characters deep to see if we can consume the word "offset" + self.ws(); + let mut it = self.cur.clone(); + match it.next() { + Some((_, 'o')) => { + match it.next() { + Some((_, 'f')) => { + let word = self.word(); + if word != "offset" { + self.err(fmt!("expected `offset`, found `%s`", + word)); + } else { + if !self.consume(':') { + self.err(~"`offset` must be followed by `:`"); + } + match self.integer() { + Some(i) => { offset = Some(i); } + None => { + self.err(~"offset must be an integer"); + } + } + } + } + Some(*) | None => {} + } + } + Some(*) | None => {} + } + + // Next, generate all the arms + loop { + let mut isother = false; + let selector = if self.wsconsume('=') { + match self.integer() { + Some(i) => Right(i), + None => { + self.err(~"plural `=` selectors must be followed by an \ + integer"); + Right(0) + } + } + } else { + let word = self.word(); + match word { + "other" => { isother = true; Left(Zero) } + "zero" => Left(Zero), + "one" => Left(One), + "two" => Left(Two), + "few" => Left(Few), + "many" => Left(Many), + word => { + self.err(fmt!("unexpected plural selector `%s`", word)); + if word == "" { + break + } else { + Left(Zero) + } + } + } + }; + if !self.wsconsume('{') { + self.err(~"selector must be followed by `{`"); + } + let pieces = self.collect(); + if !self.wsconsume('}') { + self.err(~"selector case must be terminated by `}`"); + } + if isother { + if !other.is_none() { + self.err(~"multiple `other` statements in `select"); + } + other = Some(pieces); + } else { + arms.push(PluralArm { selector: selector, result: pieces }); + } + self.ws(); + match self.cur.clone().next() { + Some((_, '}')) => { break } + Some(*) | None => {} + } + } + + let other = match other { + Some(arm) => { arm } + None => { + self.err(~"`plural` statement must provide an `other` case"); + ~[] + } + }; + ~Plural(offset, arms, other) + } + + /// Parses a Count parameter at the current position. This does not check + /// for 'CountIsNextParam' because that is only used in precision, not + /// width. + fn count(&mut self) -> Count { + match self.integer() { + Some(i) => { + if self.consume('$') { + CountIsParam(i) + } else { + CountIs(i) + } + } + None => { CountImplied } + } + } + + /// Parses a word starting at the current position. A word is considered to + /// be an alphabetic character followed by any number of alphanumeric + /// characters. + fn word(&mut self) -> &'self str { + let start = match self.cur.clone().next() { + Some((pos, c)) if char::is_alphabetic(c) => { + self.cur.next(); + pos + } + Some(*) | None => { return self.input.slice(0, 0); } + }; + let mut end; + loop { + match self.cur.clone().next() { + Some((_, c)) if char::is_alphanumeric(c) => { + self.cur.next(); + } + Some((pos, _)) => { end = pos; break } + None => { end = self.input.len(); break } + } + } + self.input.slice(start, end) + } + + /// Optionally parses an integer at the current position. This doesn't deal + /// with overflow at all, it's just accumulating digits. + fn integer(&mut self) -> Option<uint> { + let mut cur = 0; + let mut found = false; + loop { + match self.cur.clone().next() { + Some((_, c)) => { + match char::to_digit(c, 10) { + Some(i) => { + cur = cur * 10 + i; + found = true; + self.cur.next(); + } + None => { break } + } + } + None => { break } + } + } + if found { + return Some(cur); + } else { + return None; + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use prelude::*; + use realstd::fmt::{String}; + + fn same(fmt: &'static str, p: ~[Piece<'static>]) { + let mut parser = Parser::new(fmt); + assert_eq!(p, parser.collect()); + } + + fn fmtdflt() -> FormatSpec<'static> { + return FormatSpec { + fill: None, + align: None, + flags: 0, + precision: CountImplied, + width: CountImplied, + ty: "", + } + } + + fn musterr(s: &str) { + Parser::new(s).next(); + } + + #[test] + fn simple() { + same("asdf", ~[String("asdf")]); + same("a\\{b", ~[String("a"), String("{b")]); + same("a\\#b", ~[String("a"), String("#b")]); + same("a\\}b", ~[String("a"), String("}b")]); + same("a\\}", ~[String("a"), String("}")]); + same("\\}", ~[String("}")]); + } + + #[test] #[should_fail] fn invalid01() { musterr("{") } + #[test] #[should_fail] fn invalid02() { musterr("\\") } + #[test] #[should_fail] fn invalid03() { musterr("\\a") } + #[test] #[should_fail] fn invalid04() { musterr("{3a}") } + #[test] #[should_fail] fn invalid05() { musterr("{:|}") } + #[test] #[should_fail] fn invalid06() { musterr("{:>>>}") } + + #[test] + fn format_nothing() { + same("{}", ~[Argument(Argument { + position: ArgumentNext, + format: fmtdflt(), + method: None, + })]); + } + #[test] + fn format_position() { + same("{3}", ~[Argument(Argument { + position: ArgumentIs(3), + format: fmtdflt(), + method: None, + })]); + } + #[test] + fn format_position_nothing_else() { + same("{3:}", ~[Argument(Argument { + position: ArgumentIs(3), + format: fmtdflt(), + method: None, + })]); + } + #[test] + fn format_type() { + same("{3:a}", ~[Argument(Argument { + position: ArgumentIs(3), + format: FormatSpec { + fill: None, + align: None, + flags: 0, + precision: CountImplied, + width: CountImplied, + ty: "a", + }, + method: None, + })]); + } + #[test] + fn format_align_fill() { + same("{3:>}", ~[Argument(Argument { + position: ArgumentIs(3), + format: FormatSpec { + fill: None, + align: Some(AlignRight), + flags: 0, + precision: CountImplied, + width: CountImplied, + ty: "", + }, + method: None, + })]); + same("{3:0<}", ~[Argument(Argument { + position: ArgumentIs(3), + format: FormatSpec { + fill: Some('0'), + align: Some(AlignLeft), + flags: 0, + precision: CountImplied, + width: CountImplied, + ty: "", + }, + method: None, + })]); + same("{3:*<abcd}", ~[Argument(Argument { + position: ArgumentIs(3), + format: FormatSpec { + fill: Some('*'), + align: Some(AlignLeft), + flags: 0, + precision: CountImplied, + width: CountImplied, + ty: "abcd", + }, + method: None, + })]); + } + #[test] + fn format_counts() { + same("{:10s}", ~[Argument(Argument { + position: ArgumentNext, + format: FormatSpec { + fill: None, + align: None, + flags: 0, + precision: CountImplied, + width: CountIs(10), + ty: "s", + }, + method: None, + })]); + same("{:10$.10s}", ~[Argument(Argument { + position: ArgumentNext, + format: FormatSpec { + fill: None, + align: None, + flags: 0, + precision: CountIs(10), + width: CountIsParam(10), + ty: "s", + }, + method: None, + })]); + same("{:.*s}", ~[Argument(Argument { + position: ArgumentNext, + format: FormatSpec { + fill: None, + align: None, + flags: 0, + precision: CountIsNextParam, + width: CountImplied, + ty: "s", + }, + method: None, + })]); + same("{:.10$s}", ~[Argument(Argument { + position: ArgumentNext, + format: FormatSpec { + fill: None, + align: None, + flags: 0, + precision: CountIsParam(10), + width: CountImplied, + ty: "s", + }, + method: None, + })]); + } + #[test] + fn format_flags() { + same("{:-}", ~[Argument(Argument { + position: ArgumentNext, + format: FormatSpec { + fill: None, + align: None, + flags: (1 << FlagSignMinus as uint), + precision: CountImplied, + width: CountImplied, + ty: "", + }, + method: None, + })]); + same("{:+#}", ~[Argument(Argument { + position: ArgumentNext, + format: FormatSpec { + fill: None, + align: None, + flags: (1 << FlagSignPlus as uint) | (1 << FlagAlternate as uint), + precision: CountImplied, + width: CountImplied, + ty: "", + }, + method: None, + })]); + } + #[test] + fn format_mixture() { + same("abcd {3:a} efg", ~[String("abcd "), Argument(Argument { + position: ArgumentIs(3), + format: FormatSpec { + fill: None, + align: None, + flags: 0, + precision: CountImplied, + width: CountImplied, + ty: "a", + }, + method: None, + }), String(" efg")]); + } + + #[test] + fn select_simple() { + same("{, select, other { haha } }", ~[Argument(Argument{ + position: ArgumentNext, + format: fmtdflt(), + method: Some(~Select(~[], ~[String(" haha ")])) + })]); + same("{1, select, other { haha } }", ~[Argument(Argument{ + position: ArgumentIs(1), + format: fmtdflt(), + method: Some(~Select(~[], ~[String(" haha ")])) + })]); + same("{1, select, other {#} }", ~[Argument(Argument{ + position: ArgumentIs(1), + format: fmtdflt(), + method: Some(~Select(~[], ~[CurrentArgument])) + })]); + same("{1, select, other {{2, select, other {lol}}} }", ~[Argument(Argument{ + position: ArgumentIs(1), + format: fmtdflt(), + method: Some(~Select(~[], ~[Argument(Argument{ + position: ArgumentIs(2), + format: fmtdflt(), + method: Some(~Select(~[], ~[String("lol")])) + })])) // wat + })]); + } + + #[test] + fn select_cases() { + same("{1, select, a{1} b{2} c{3} other{4} }", ~[Argument(Argument{ + position: ArgumentIs(1), + format: fmtdflt(), + method: Some(~Select(~[ + SelectArm{ selector: "a", result: ~[String("1")] }, + SelectArm{ selector: "b", result: ~[String("2")] }, + SelectArm{ selector: "c", result: ~[String("3")] }, + ], ~[String("4")])) + })]); + } + + #[test] #[should_fail] fn badselect01() { + musterr("{select, }") + } + #[test] #[should_fail] fn badselect02() { + musterr("{1, select}") + } + #[test] #[should_fail] fn badselect03() { + musterr("{1, select, }") + } + #[test] #[should_fail] fn badselect04() { + musterr("{1, select, a {}}") + } + #[test] #[should_fail] fn badselect05() { + musterr("{1, select, other }}") + } + #[test] #[should_fail] fn badselect06() { + musterr("{1, select, other {}") + } + #[test] #[should_fail] fn badselect07() { + musterr("{select, other {}") + } + #[test] #[should_fail] fn badselect08() { + musterr("{1 select, other {}") + } + #[test] #[should_fail] fn badselect09() { + musterr("{:d select, other {}") + } + #[test] #[should_fail] fn badselect10() { + musterr("{1:d select, other {}") + } + + #[test] + fn plural_simple() { + same("{, plural, other { haha } }", ~[Argument(Argument{ + position: ArgumentNext, + format: fmtdflt(), + method: Some(~Plural(None, ~[], ~[String(" haha ")])) + })]); + same("{:, plural, other { haha } }", ~[Argument(Argument{ + position: ArgumentNext, + format: fmtdflt(), + method: Some(~Plural(None, ~[], ~[String(" haha ")])) + })]); + same("{, plural, offset:1 =2{2} =3{3} many{yes} other{haha} }", + ~[Argument(Argument{ + position: ArgumentNext, + format: fmtdflt(), + method: Some(~Plural(Some(1), ~[ + PluralArm{ selector: Right(2), result: ~[String("2")] }, + PluralArm{ selector: Right(3), result: ~[String("3")] }, + PluralArm{ selector: Left(Many), result: ~[String("yes")] } + ], ~[String("haha")])) + })]); + } +} diff --git a/src/libstd/fmt/rt.rs b/src/libstd/fmt/rt.rs new file mode 100644 index 00000000000..6feb1d7a848 --- /dev/null +++ b/src/libstd/fmt/rt.rs @@ -0,0 +1,62 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! This is an internal module used by the ifmt! runtime. These structures are +//! emitted to static arrays to precompile format strings ahead of time. +//! +//! These definitions are similar to their `ct` equivalents, but differ in that +//! these can be statically allocated and are slightly optimized for the runtime + +#[allow(missing_doc)]; +#[doc(hidden)]; + +use either::Either; +use fmt::parse; +use option::Option; + +pub enum Piece<'self> { + String(&'self str), + // FIXME(#8259): this shouldn't require the unit-value here + CurrentArgument(()), + Argument(Argument<'self>), +} + +pub struct Argument<'self> { + position: Position, + format: FormatSpec, + method: Option<&'self Method<'self>> +} + +pub struct FormatSpec { + fill: char, + alignleft: bool, + flags: uint, + precision: parse::Count, + width: parse::Count, +} + +pub enum Position { + ArgumentNext, ArgumentIs(uint) +} + +pub enum Method<'self> { + Plural(Option<uint>, &'self [PluralArm<'self>], &'self [Piece<'self>]), + Select(&'self [SelectArm<'self>], &'self [Piece<'self>]), +} + +pub struct PluralArm<'self> { + selector: Either<parse::PluralKeyword, uint>, + result: &'self [Piece<'self>], +} + +pub struct SelectArm<'self> { + selector: &'self str, + result: &'self [Piece<'self>], +} diff --git a/src/libstd/rt/io/mem.rs b/src/libstd/rt/io/mem.rs index c93945a6a9a..277897e5d2e 100644 --- a/src/libstd/rt/io/mem.rs +++ b/src/libstd/rt/io/mem.rs @@ -26,7 +26,7 @@ pub struct MemWriter { } impl MemWriter { - pub fn new() -> MemWriter { MemWriter { buf: ~[] } } + pub fn new() -> MemWriter { MemWriter { buf: vec::with_capacity(128) } } } impl Writer for MemWriter { diff --git a/src/libstd/std.rs b/src/libstd/std.rs index 568709c89da..7000b56069d 100644 --- a/src/libstd/std.rs +++ b/src/libstd/std.rs @@ -177,6 +177,7 @@ pub mod rand; pub mod run; pub mod sys; pub mod cast; +pub mod fmt; pub mod repr; pub mod cleanup; pub mod reflect; @@ -216,4 +217,6 @@ mod std { pub use unstable; pub use str; pub use os; + pub use fmt; + pub use to_bytes; } |
