auto merge of #5463 : alexcrichton/rust/faster-fmt, r=graydon

This is a minor step towards #3571, although I'm sure there's still more work to be done. Previously, `fmt!` collected a bunch of strings in a vector and then called `str::concat`. This changes the behavior by maintaining only one buffer and appending directly into that buffer. This avoids doubly-allocating memory, and it has the added bonus of reducing some allocations in `core::unstable::extfmt` One of the unfortunate side effects of this is that the `rt` module in `extfmt.rs` had to be duplicated to avoid `stage0` errors. Dealing with the change in conversion functions may require a bit of a dance when a snapshot happens, but I think it's doable. If the second speedup commit isn't deemed necessary, I got about a 15% speedup with just the first patch which doesn't require any modification of `extfmt.rs`, so no snapshot weirdness. Here's some other things I ran into when looking at `fmt!`: * I don't think that #2249 is relevant any more except for maybe removing one of `%i` or `%d` * I'm not sure what was in mind for using traits with #3571, but I thought that formatters like `%u` could invoke the `to_uint()` method on the `NumCast` trait, but I ran into some problems like those in #5462 I'm having trouble thinking of other wins for `fmt!`, but if there's some suggestions I'd be more than willing to look into if they'd work out or not.
author: bors <bors@rust-lang.org> 2013-03-22 10:18:53 -0700
committer: bors <bors@rust-lang.org> 2013-03-22 10:18:53 -0700
commit: f011f928dd69a5b770b348aea2c547431c34e11a (patch)
tree: b389e0437a91eb9917c5fe7166ebb9c46692f62a
parent: 1616ffd0c2627502b1015b6388480ed7429ef042 (diff)
parent: e93654c96d0288e6f2f00075d95dd4958b4cb4dc (diff)
download: rust-f011f928dd69a5b770b348aea2c547431c34e11a.tar.gz
rust-f011f928dd69a5b770b348aea2c547431c34e11a.zip
6 files changed, 281 insertions, 61 deletions
diff --git a/src/libcore/unstable/extfmt.rs b/src/libcore/unstable/extfmt.rs
index 28f810c3a28..7527a6afb55 100644
--- a/src/libcore/unstable/extfmt.rs
+++ b/src/libcore/unstable/extfmt.rs
@@ -470,6 +470,215 @@ pub mod ct {
 // decisions made a runtime. If it proves worthwhile then some of these
 // conditions can be evaluated at compile-time. For now though it's cleaner to
 // implement it this way, I think.
+#[cfg(stage1)]
+#[cfg(stage2)]
+#[cfg(stage3)]
+#[doc(hidden)]
+pub mod rt {
+    use float;
+    use str;
+    use sys;
+    use int;
+    use uint;
+    use vec;
+    use option::{Some, None, Option};
+
+    pub const flag_none : u32 = 0u32;
+    pub const flag_left_justify   : u32 = 0b00000000000001u32;
+    pub const flag_left_zero_pad  : u32 = 0b00000000000010u32;
+    pub const flag_space_for_sign : u32 = 0b00000000000100u32;
+    pub const flag_sign_always    : u32 = 0b00000000001000u32;
+    pub const flag_alternate      : u32 = 0b00000000010000u32;
+
+    pub enum Count { CountIs(uint), CountImplied, }
+
+    pub enum Ty { TyDefault, TyBits, TyHexUpper, TyHexLower, TyOctal, }
+
+    pub struct Conv {
+        flags: u32,
+        width: Count,
+        precision: Count,
+        ty: Ty,
+    }
+
+    pub pure fn conv_int(cv: Conv, i: int, buf: &mut ~str) {
+        let radix = 10;
+        let prec = get_int_precision(cv);
+        let mut s : ~str = uint_to_str_prec(int::abs(i) as uint, radix, prec);
+
+        let head = if i >= 0 {
+            if have_flag(cv.flags, flag_sign_always) {
+                Some('+')
+            } else if have_flag(cv.flags, flag_space_for_sign) {
+                Some(' ')
+            } else {
+                None
+            }
+        } else { Some('-') };
+        unsafe { pad(cv, s, head, PadSigned, buf) };
+    }
+    pub pure fn conv_uint(cv: Conv, u: uint, buf: &mut ~str) {
+        let prec = get_int_precision(cv);
+        let mut rs =
+            match cv.ty {
+              TyDefault => uint_to_str_prec(u, 10, prec),
+              TyHexLower => uint_to_str_prec(u, 16, prec),
+              TyHexUpper => str::to_upper(uint_to_str_prec(u, 16, prec)),
+              TyBits => uint_to_str_prec(u, 2, prec),
+              TyOctal => uint_to_str_prec(u, 8, prec)
+            };
+        unsafe { pad(cv, rs, None, PadUnsigned, buf) };
+    }
+    pub pure fn conv_bool(cv: Conv, b: bool, buf: &mut ~str) {
+        let s = if b { "true" } else { "false" };
+        // run the boolean conversion through the string conversion logic,
+        // giving it the same rules for precision, etc.
+        conv_str(cv, s, buf);
+    }
+    pub pure fn conv_char(cv: Conv, c: char, buf: &mut ~str) {
+        unsafe { pad(cv, "", Some(c), PadNozero, buf) };
+    }
+    pub pure fn conv_str(cv: Conv, s: &str, buf: &mut ~str) {
+        // For strings, precision is the maximum characters
+        // displayed
+        let mut unpadded = match cv.precision {
+          CountImplied => s,
+          CountIs(max) => if (max as uint) < str::char_len(s) {
+            str::slice(s, 0, max as uint)
+          } else {
+            s
+          }
+        };
+        unsafe { pad(cv, unpadded, None, PadNozero, buf) };
+    }
+    pub pure fn conv_float(cv: Conv, f: float, buf: &mut ~str) {
+        let (to_str, digits) = match cv.precision {
+              CountIs(c) => (float::to_str_exact, c as uint),
+              CountImplied => (float::to_str_digits, 6u)
+        };
+        let mut s = unsafe { to_str(f, digits) };
+        let head = if 0.0 <= f {
+            if have_flag(cv.flags, flag_sign_always) {
+                Some('+')
+            } else if have_flag(cv.flags, flag_space_for_sign) {
+                Some(' ')
+            } else {
+                None
+            }
+        } else { None };
+        unsafe { pad(cv, s, head, PadFloat, buf) };
+    }
+    pub pure fn conv_poly<T>(cv: Conv, v: &T, buf: &mut ~str) {
+        let s = sys::log_str(v);
+        conv_str(cv, s, buf);
+    }
+
+    // Convert a uint to string with a minimum number of digits.  If precision
+    // is 0 and num is 0 then the result is the empty string. Could move this
+    // to uint: but it doesn't seem all that useful.
+    pub pure fn uint_to_str_prec(num: uint, radix: uint,
+                                 prec: uint) -> ~str {
+        return if prec == 0u && num == 0u {
+                ~""
+            } else {
+                let s = uint::to_str_radix(num, radix);
+                let len = str::char_len(s);
+                if len < prec {
+                    let diff = prec - len;
+                    let pad = str::from_chars(vec::from_elem(diff, '0'));
+                    pad + s
+                } else { s }
+            };
+    }
+    pub pure fn get_int_precision(cv: Conv) -> uint {
+        return match cv.precision {
+              CountIs(c) => c as uint,
+              CountImplied => 1u
+            };
+    }
+
+    #[deriving(Eq)]
+    pub enum PadMode { PadSigned, PadUnsigned, PadNozero, PadFloat }
+
+    pub fn pad(cv: Conv, mut s: &str, head: Option<char>, mode: PadMode,
+               buf: &mut ~str) {
+        let headsize = match head { Some(_) => 1, _ => 0 };
+        let uwidth : uint = match cv.width {
+            CountImplied => {
+                for head.each |&c| {
+                    buf.push_char(c);
+                }
+                return buf.push_str(s);
+            }
+            CountIs(width) => { width as uint }
+        };
+        let strlen = str::char_len(s) + headsize;
+        if uwidth <= strlen {
+            for head.each |&c| {
+                buf.push_char(c);
+            }
+            return buf.push_str(s);
+        }
+        let mut padchar = ' ';
+        let diff = uwidth - strlen;
+        if have_flag(cv.flags, flag_left_justify) {
+            for head.each |&c| {
+                buf.push_char(c);
+            }
+            buf.push_str(s);
+            for diff.times {
+                buf.push_char(padchar);
+            }
+            return;
+        }
+        let (might_zero_pad, signed) = match mode {
+          PadNozero   => (false, true),
+          PadSigned   => (true, true),
+          PadFloat    => (true, true),
+          PadUnsigned => (true, false)
+        };
+        pure fn have_precision(cv: Conv) -> bool {
+            return match cv.precision { CountImplied => false, _ => true };
+        }
+        let zero_padding = {
+            if might_zero_pad && have_flag(cv.flags, flag_left_zero_pad) &&
+                (!have_precision(cv) || mode == PadFloat) {
+                padchar = '0';
+                true
+            } else {
+                false
+            }
+        };
+        let padstr = str::from_chars(vec::from_elem(diff, padchar));
+        // This is completely heinous. If we have a signed value then
+        // potentially rip apart the intermediate result and insert some
+        // zeros. It may make sense to convert zero padding to a precision
+        // instead.
+
+        if signed && zero_padding {
+            for head.each |&head| {
+                if head == '+' || head == '-' || head == ' ' {
+                    buf.push_char(head);
+                    buf.push_str(padstr);
+                    buf.push_str(s);
+                    return;
+                }
+            }
+        }
+        buf.push_str(padstr);
+        for head.each |&c| {
+            buf.push_char(c);
+        }
+        buf.push_str(s);
+    }
+    #[inline(always)]
+    pub pure fn have_flag(flags: u32, f: u32) -> bool {
+        flags & f != 0
+    }
+}
+
+// XXX: remove after a snapshot of the above changes have gone in
+#[cfg(stage0)]
 #[doc(hidden)]
 pub mod rt {
     use float;
diff --git a/src/librustc/middle/typeck/check/regionck.rs b/src/librustc/middle/typeck/check/regionck.rs
index 2007742b43a..6682082bd18 100644
--- a/src/librustc/middle/typeck/check/regionck.rs
+++ b/src/librustc/middle/typeck/check/regionck.rs
@@ -32,7 +32,6 @@ use core::prelude::*;
 use middle::freevars::get_freevars;
 use middle::pat_util::{pat_bindings, pat_is_binding};
 use middle::ty::{encl_region, re_scope};
-use middle::ty::{vstore_box, vstore_fixed, vstore_slice};
 use middle::ty;
 use middle::typeck::check::FnCtxt;
 use middle::typeck::check::lookup_def;
diff --git a/src/librustc/middle/typeck/check/regionmanip.rs b/src/librustc/middle/typeck/check/regionmanip.rs
index c78a91b95e4..abbefd1f7e6 100644
--- a/src/librustc/middle/typeck/check/regionmanip.rs
+++ b/src/librustc/middle/typeck/check/regionmanip.rs
@@ -20,8 +20,6 @@ use util::ppaux::region_to_str;
 use util::ppaux;
 
 use std::list::Cons;
-use syntax::ast;
-use syntax::codemap;
 
 // Helper functions related to manipulating region types.
 
diff --git a/src/librustc/middle/typeck/mod.rs b/src/librustc/middle/typeck/mod.rs
index 1787c733ed5..7724b43b50f 100644
--- a/src/librustc/middle/typeck/mod.rs
+++ b/src/librustc/middle/typeck/mod.rs
@@ -51,7 +51,6 @@ independently:
 use core::prelude::*;
 
 use middle::resolve;
-use middle::ty::{ty_param_substs_and_ty, vstore_uniq};
 use middle::ty;
 use util::common::time;
 use util::ppaux;
diff --git a/src/libsyntax/ext/build.rs b/src/libsyntax/ext/build.rs
index 18c7cd3f861..c2f4cbf3db2 100644
--- a/src/libsyntax/ext/build.rs
+++ b/src/libsyntax/ext/build.rs
@@ -108,6 +108,9 @@ pub fn mk_access(cx: @ext_ctxt, sp: span, +p: ~[ast::ident], m: ast::ident)
 pub fn mk_addr_of(cx: @ext_ctxt, sp: span, e: @ast::expr) -> @ast::expr {
     return mk_expr(cx, sp, ast::expr_addr_of(ast::m_imm, e));
 }
+pub fn mk_mut_addr_of(cx: @ext_ctxt, sp: span, e: @ast::expr) -> @ast::expr {
+    return mk_expr(cx, sp, ast::expr_addr_of(ast::m_mutbl, e));
+}
 pub fn mk_call_(cx: @ext_ctxt, sp: span, fn_expr: @ast::expr,
                 +args: ~[@ast::expr]) -> @ast::expr {
     mk_expr(cx, sp, ast::expr_call(fn_expr, args, ast::NoSugar))
diff --git a/src/libsyntax/ext/fmt.rs b/src/libsyntax/ext/fmt.rs
index 9973c9558c9..3ebe844950a 100644
--- a/src/libsyntax/ext/fmt.rs
+++ b/src/libsyntax/ext/fmt.rs
@@ -139,19 +139,17 @@ fn pieces_to_expr(cx: @ext_ctxt, sp: span,
         make_conv_struct(cx, sp, rt_conv_flags, rt_conv_width,
                          rt_conv_precision, rt_conv_ty)
     }
-    fn make_conv_call(cx: @ext_ctxt, sp: span, conv_type: ~str, cnv: &Conv,
-                      arg: @ast::expr) -> @ast::expr {
+    fn make_conv_call(cx: @ext_ctxt, sp: span, conv_type: &str, cnv: &Conv,
+                      arg: @ast::expr, buf: @ast::expr) -> @ast::expr {
         let fname = ~"conv_" + conv_type;
         let path = make_path_vec(cx, @fname);
         let cnv_expr = make_rt_conv_expr(cx, sp, cnv);
-        let args = ~[cnv_expr, arg];
+        let args = ~[cnv_expr, arg, buf];
         return mk_call_global(cx, arg.span, path, args);
     }
 
-    fn make_new_conv(cx: @ext_ctxt, sp: span, cnv: &Conv, arg: @ast::expr) ->
-       @ast::expr {
-        // FIXME: Move validation code into core::extfmt (Issue #2249)
-
+    fn make_new_conv(cx: @ext_ctxt, sp: span, cnv: &Conv,
+                     arg: @ast::expr, buf: @ast::expr) -> @ast::expr {
         fn is_signed_type(cnv: &Conv) -> bool {
             match cnv.ty {
               TyInt(s) => match s {
@@ -198,29 +196,20 @@ fn pieces_to_expr(cx: @ext_ctxt, sp: span,
           CountIs(_) => (),
           _ => cx.span_unimpl(sp, unsupported)
         }
-        match cnv.ty {
-          TyStr => return make_conv_call(cx, arg.span, ~"str", cnv, arg),
-          TyInt(sign) => match sign {
-            Signed => return make_conv_call(cx, arg.span, ~"int", cnv, arg),
-            Unsigned => {
-                return make_conv_call(cx, arg.span, ~"uint", cnv, arg)
-            }
-          },
-          TyBool => return make_conv_call(cx, arg.span, ~"bool", cnv, arg),
-          TyChar => return make_conv_call(cx, arg.span, ~"char", cnv, arg),
-          TyHex(_) => {
-            return make_conv_call(cx, arg.span, ~"uint", cnv, arg);
-          }
-          TyBits => return make_conv_call(cx, arg.span, ~"uint", cnv, arg),
-          TyOctal => return make_conv_call(cx, arg.span, ~"uint", cnv, arg),
-          TyFloat => {
-            return make_conv_call(cx, arg.span, ~"float", cnv, arg);
-          }
-          TyPoly => return make_conv_call(cx, arg.span, ~"poly", cnv,
-                       mk_addr_of(cx, sp, arg))
-        }
+        let (name, actual_arg) = match cnv.ty {
+            TyStr => ("str", arg),
+            TyInt(Signed) => ("int", arg),
+            TyBool => ("bool", arg),
+            TyChar => ("char", arg),
+            TyBits | TyOctal | TyHex(_) | TyInt(Unsigned) => ("uint", arg),
+            TyFloat => ("float", arg),
+            TyPoly => ("poly", mk_addr_of(cx, sp, arg))
+        };
+        return make_conv_call(cx, arg.span, name, cnv, actual_arg,
+                              mk_mut_addr_of(cx, arg.span, buf));
     }
     fn log_conv(c: &Conv) {
+        debug!("Building conversion:");
         match c.param {
           Some(p) => { debug!("param: %s", p.to_str()); }
           _ => debug!("param: none")
@@ -268,49 +257,72 @@ fn pieces_to_expr(cx: @ext_ctxt, sp: span,
           TyPoly => debug!("type: poly")
         }
     }
+
     let fmt_sp = args[0].span;
     let mut n = 0u;
-    let mut piece_exprs = ~[];
     let nargs = args.len();
-    for pieces.each |pc| {
-        match *pc {
-          PieceString(ref s) => {
-            piece_exprs.push(mk_uniq_str(cx, fmt_sp, copy *s))
-          }
-          PieceConv(ref conv) => {
-            n += 1u;
-            if n >= nargs {
-                cx.span_fatal(sp,
-                              ~"not enough arguments to fmt! " +
+
+    /* 'ident' is the local buffer building up the result of fmt! */
+    let ident = cx.parse_sess().interner.intern(@~"__fmtbuf");
+    let buf = || mk_path(cx, fmt_sp, ~[ident]);
+    let str_ident = cx.parse_sess().interner.intern(@~"str");
+    let push_ident = cx.parse_sess().interner.intern(@~"push_str");
+    let mut stms = ~[];
+
+    /* Translate each piece (portion of the fmt expression) by invoking the
+       corresponding function in core::unstable::extfmt. Each function takes a
+       buffer to insert data into along with the data being formatted. */
+    do vec::consume(pieces) |i, pc| {
+        match pc {
+            /* Raw strings get appended via str::push_str */
+            PieceString(s) => {
+                let portion = mk_uniq_str(cx, fmt_sp, s);
+
+                /* If this is the first portion, then initialize the local
+                   buffer with it directly */
+                if i == 0 {
+                    stms.push(mk_local(cx, fmt_sp, true, ident, portion));
+                } else {
+                    let args = ~[mk_mut_addr_of(cx, fmt_sp, buf()), portion];
+                    let call = mk_call_global(cx,
+                                              fmt_sp,
+                                              ~[str_ident, push_ident],
+                                              args);
+                    stms.push(mk_stmt(cx, fmt_sp, call));
+                }
+            }
+
+            /* Invoke the correct conv function in extfmt */
+            PieceConv(ref conv) => {
+                n += 1u;
+                if n >= nargs {
+                    cx.span_fatal(sp,
+                                  ~"not enough arguments to fmt! " +
                                   ~"for the given format string");
+                }
+
+                log_conv(conv);
+                /* If the first portion is a conversion, then the local buffer
+                   must be initialized as an empty string */
+                if i == 0 {
+                    stms.push(mk_local(cx, fmt_sp, true, ident,
+                                       mk_uniq_str(cx, fmt_sp, ~"")));
+                }
+                stms.push(mk_stmt(cx, fmt_sp,
+                                  make_new_conv(cx, fmt_sp, conv,
+                                                args[n], buf())));
             }
-            debug!("Building conversion:");
-            log_conv(conv);
-            let arg_expr = args[n];
-            let c_expr = make_new_conv(
-                cx,
-                fmt_sp,
-                conv,
-                arg_expr
-            );
-            piece_exprs.push(c_expr);
-          }
         }
     }
-    let expected_nargs = n + 1u; // n conversions + the fmt string
 
+    let expected_nargs = n + 1u; // n conversions + the fmt string
     if expected_nargs < nargs {
         cx.span_fatal
             (sp, fmt!("too many arguments to fmt!. found %u, expected %u",
                            nargs, expected_nargs));
     }
 
-    let arg_vec = mk_fixed_vec_e(cx, fmt_sp, piece_exprs);
-    return mk_call_global(cx,
-                          fmt_sp,
-                          ~[cx.parse_sess().interner.intern(@~"str"),
-                            cx.parse_sess().interner.intern(@~"concat")],
-                          ~[arg_vec]);
+    return mk_block(cx, fmt_sp, ~[], stms, Some(buf()));
 }
 //
 // Local Variables:
author	bors <bors@rust-lang.org>	2013-03-22 10:18:53 -0700
committer	bors <bors@rust-lang.org>	2013-03-22 10:18:53 -0700
commit	f011f928dd69a5b770b348aea2c547431c34e11a (patch)
tree	b389e0437a91eb9917c5fe7166ebb9c46692f62a
parent	1616ffd0c2627502b1015b6388480ed7429ef042 (diff)
parent	e93654c96d0288e6f2f00075d95dd4958b4cb4dc (diff)
download	rust-f011f928dd69a5b770b348aea2c547431c34e11a.tar.gz rust-f011f928dd69a5b770b348aea2c547431c34e11a.zip