auto merge of #5463 : alexcrichton/rust/faster-fmt, r=graydon

bors · bors · commit f011f928dd69 · 2013-03-22T10:18:53.000-07:00
This is a minor step towards #3571, although I'm sure there's still more work to be done. Previously, `fmt!` collected a bunch of strings in a vector and then called `str::concat`. This changes the behavior by maintaining only one buffer and appending directly into that buffer. This avoids doubly-allocating memory, and it has the added bonus of reducing some allocations in `core::unstable::extfmt` One of the unfortunate side effects of this is that the `rt` module in `extfmt.rs` had to be duplicated to avoid `stage0` errors. Dealing with the change in conversion functions may require a bit of a dance when a snapshot happens, but I think it's doable. If the second speedup commit isn't deemed necessary, I got about a 15% speedup with just the first patch which doesn't require any modification of `extfmt.rs`, so no snapshot weirdness. Here's some other things I ran into when looking at `fmt!`: * I don't think that #2249 is relevant any more except for maybe removing one of `%i` or `%d` * I'm not sure what was in mind for using traits with #3571, but I thought that formatters like `%u` could invoke the `to_uint()` method on the `NumCast` trait, but I ran into some problems like those in #5462 I'm having trouble thinking of other wins for `fmt!`, but if there's some suggestions I'd be more than willing to look into if they'd work out or not.
diff --git a/src/libcore/unstable/extfmt.rs b/src/libcore/unstable/extfmt.rs
@@ -470,6 +470,215 @@ pub mod ct {
 // decisions made a runtime. If it proves worthwhile then some of these
 // conditions can be evaluated at compile-time. For now though it's cleaner to
 // implement it this way, I think.
+#[cfg(stage1)]
+#[cfg(stage2)]
+#[cfg(stage3)]
+#[doc(hidden)]
+pub mod rt {
+    use float;
+    use str;
+    use sys;
+    use int;
+    use uint;
+    use vec;
+    use option::{Some, None, Option};
+
+    pub const flag_none : u32 = 0u32;
+    pub const flag_left_justify   : u32 = 0b00000000000001u32;
+    pub const flag_left_zero_pad  : u32 = 0b00000000000010u32;
+    pub const flag_space_for_sign : u32 = 0b00000000000100u32;
+    pub const flag_sign_always    : u32 = 0b00000000001000u32;
+    pub const flag_alternate      : u32 = 0b00000000010000u32;
+
+    pub enum Count { CountIs(uint), CountImplied, }
+
+    pub enum Ty { TyDefault, TyBits, TyHexUpper, TyHexLower, TyOctal, }
+
+    pub struct Conv {
+        flags: u32,
+        width: Count,
+        precision: Count,
+        ty: Ty,
+    }
+
+    pub pure fn conv_int(cv: Conv, i: int, buf: &mut ~str) {
+        let radix = 10;
+        let prec = get_int_precision(cv);
+        let mut s : ~str = uint_to_str_prec(int::abs(i) as uint, radix, prec);
+
+        let head = if i >= 0 {
+            if have_flag(cv.flags, flag_sign_always) {
+                Some('+')
+            } else if have_flag(cv.flags, flag_space_for_sign) {
+                Some(' ')
+            } else {
+                None
+            }
+        } else { Some('-') };
+        unsafe { pad(cv, s, head, PadSigned, buf) };
+    }
+    pub pure fn conv_uint(cv: Conv, u: uint, buf: &mut ~str) {
+        let prec = get_int_precision(cv);
+        let mut rs =
+            match cv.ty {
+              TyDefault => uint_to_str_prec(u, 10, prec),
+              TyHexLower => uint_to_str_prec(u, 16, prec),
+              TyHexUpper => str::to_upper(uint_to_str_prec(u, 16, prec)),
+              TyBits => uint_to_str_prec(u, 2, prec),
+              TyOctal => uint_to_str_prec(u, 8, prec)
+            };
+        unsafe { pad(cv, rs, None, PadUnsigned, buf) };
+    }
+    pub pure fn conv_bool(cv: Conv, b: bool, buf: &mut ~str) {
+        let s = if b { "true" } else { "false" };
+        // run the boolean conversion through the string conversion logic,
+        // giving it the same rules for precision, etc.
+        conv_str(cv, s, buf);
+    }
+    pub pure fn conv_char(cv: Conv, c: char, buf: &mut ~str) {
+        unsafe { pad(cv, "", Some(c), PadNozero, buf) };
+    }
+    pub pure fn conv_str(cv: Conv, s: &str, buf: &mut ~str) {
+        // For strings, precision is the maximum characters
+        // displayed
+        let mut unpadded = match cv.precision {
+          CountImplied => s,
+          CountIs(max) => if (max as uint) < str::char_len(s) {
+            str::slice(s, 0, max as uint)
+          } else {
+            s
+          }
+        };
+        unsafe { pad(cv, unpadded, None, PadNozero, buf) };
+    }
+    pub pure fn conv_float(cv: Conv, f: float, buf: &mut ~str) {
+        let (to_str, digits) = match cv.precision {
+              CountIs(c) => (float::to_str_exact, c as uint),
+              CountImplied => (float::to_str_digits, 6u)
+        };
+        let mut s = unsafe { to_str(f, digits) };
+        let head = if 0.0 <= f {
+            if have_flag(cv.flags, flag_sign_always) {
+                Some('+')
+            } else if have_flag(cv.flags, flag_space_for_sign) {
+                Some(' ')
+            } else {
+                None
+            }
+        } else { None };
+        unsafe { pad(cv, s, head, PadFloat, buf) };
+    }
+    pub pure fn conv_poly<T>(cv: Conv, v: &T, buf: &mut ~str) {
+        let s = sys::log_str(v);
+        conv_str(cv, s, buf);
+    }
+
+    // Convert a uint to string with a minimum number of digits.  If precision
+    // is 0 and num is 0 then the result is the empty string. Could move this
+    // to uint: but it doesn't seem all that useful.
+    pub pure fn uint_to_str_prec(num: uint, radix: uint,
+                                 prec: uint) -> ~str {
+        return if prec == 0u && num == 0u {
+                ~""
+            } else {
+                let s = uint::to_str_radix(num, radix);
+                let len = str::char_len(s);
+                if len < prec {
+                    let diff = prec - len;
+                    let pad = str::from_chars(vec::from_elem(diff, '0'));
+                    pad + s
+                } else { s }
+            };
+    }
+    pub pure fn get_int_precision(cv: Conv) -> uint {
+        return match cv.precision {
+              CountIs(c) => c as uint,
+              CountImplied => 1u
+            };
+    }
+
+    #[deriving(Eq)]
+    pub enum PadMode { PadSigned, PadUnsigned, PadNozero, PadFloat }
+
+    pub fn pad(cv: Conv, mut s: &str, head: Option<char>, mode: PadMode,
+               buf: &mut ~str) {
+        let headsize = match head { Some(_) => 1, _ => 0 };
+        let uwidth : uint = match cv.width {
+            CountImplied => {
+                for head.each |&c| {
+                    buf.push_char(c);
+                }
+                return buf.push_str(s);
+            }
+            CountIs(width) => { width as uint }
+        };
+        let strlen = str::char_len(s) + headsize;
+        if uwidth <= strlen {
+            for head.each |&c| {
+                buf.push_char(c);
+            }
+            return buf.push_str(s);
+        }
+        let mut padchar = ' ';
+        let diff = uwidth - strlen;
+        if have_flag(cv.flags, flag_left_justify) {
+            for head.each |&c| {
+                buf.push_char(c);
+            }
+            buf.push_str(s);
+            for diff.times {
+                buf.push_char(padchar);
+            }
+            return;
+        }
+        let (might_zero_pad, signed) = match mode {
+          PadNozero   => (false, true),
+          PadSigned   => (true, true),
+          PadFloat    => (true, true),
+          PadUnsigned => (true, false)
+        };
+        pure fn have_precision(cv: Conv) -> bool {
+            return match cv.precision { CountImplied => false, _ => true };
+        }
+        let zero_padding = {
+            if might_zero_pad && have_flag(cv.flags, flag_left_zero_pad) &&
+                (!have_precision(cv) || mode == PadFloat) {
+                padchar = '0';
+                true
+            } else {
+                false
+            }
+        };
+        let padstr = str::from_chars(vec::from_elem(diff, padchar));
+        // This is completely heinous. If we have a signed value then
+        // potentially rip apart the intermediate result and insert some
+        // zeros. It may make sense to convert zero padding to a precision
+        // instead.
+
+        if signed && zero_padding {
+            for head.each |&head| {
+                if head == '+' || head == '-' || head == ' ' {
+                    buf.push_char(head);
+                    buf.push_str(padstr);
+                    buf.push_str(s);
+                    return;
+                }
+            }
+        }
+        buf.push_str(padstr);
+        for head.each |&c| {
+            buf.push_char(c);
+        }
+        buf.push_str(s);
+    }
+    #[inline(always)]
+    pub pure fn have_flag(flags: u32, f: u32) -> bool {
+        flags & f != 0
+    }
+}
+
+// XXX: remove after a snapshot of the above changes have gone in
+#[cfg(stage0)]
 #[doc(hidden)]
 pub mod rt {
     use float;
diff --git a/src/librustc/middle/typeck/check/regionck.rs b/src/librustc/middle/typeck/check/regionck.rs
@@ -32,7 +32,6 @@ use core::prelude::*;
 use middle::freevars::get_freevars;
 use middle::pat_util::{pat_bindings, pat_is_binding};
 use middle::ty::{encl_region, re_scope};
-use middle::ty::{vstore_box, vstore_fixed, vstore_slice};
 use middle::ty;
 use middle::typeck::check::FnCtxt;
 use middle::typeck::check::lookup_def;
diff --git a/src/librustc/middle/typeck/check/regionmanip.rs b/src/librustc/middle/typeck/check/regionmanip.rs
@@ -20,8 +20,6 @@ use util::ppaux::region_to_str;
 use util::ppaux;
 
 use std::list::Cons;
-use syntax::ast;
-use syntax::codemap;
 
 // Helper functions related to manipulating region types.
 
diff --git a/src/librustc/middle/typeck/mod.rs b/src/librustc/middle/typeck/mod.rs
@@ -51,7 +51,6 @@ independently:
 use core::prelude::*;
 
 use middle::resolve;
-use middle::ty::{ty_param_substs_and_ty, vstore_uniq};
 use middle::ty;
 use util::common::time;
 use util::ppaux;
diff --git a/src/libsyntax/ext/build.rs b/src/libsyntax/ext/build.rs
@@ -108,6 +108,9 @@ pub fn mk_access(cx: @ext_ctxt, sp: span, +p: ~[ast::ident], m: ast::ident)
 pub fn mk_addr_of(cx: @ext_ctxt, sp: span, e: @ast::expr) -> @ast::expr {
     return mk_expr(cx, sp, ast::expr_addr_of(ast::m_imm, e));
 }
+pub fn mk_mut_addr_of(cx: @ext_ctxt, sp: span, e: @ast::expr) -> @ast::expr {
+    return mk_expr(cx, sp, ast::expr_addr_of(ast::m_mutbl, e));
+}
 pub fn mk_call_(cx: @ext_ctxt, sp: span, fn_expr: @ast::expr,
                 +args: ~[@ast::expr]) -> @ast::expr {
     mk_expr(cx, sp, ast::expr_call(fn_expr, args, ast::NoSugar))
diff --git a/src/libsyntax/ext/fmt.rs b/src/libsyntax/ext/fmt.rs

-Original file line number
+Diff line change
         make_conv_struct(cx, sp, rt_conv_flags, rt_conv_width,
                          rt_conv_precision, rt_conv_ty)
+    }
 -    fn make_conv_call(cx: @ext_ctxt, sp: span, conv_type: ~str, cnv: &Conv,
 -                      arg: @ast::expr) -> @ast::expr {
 +    fn make_conv_call(cx: @ext_ctxt, sp: span, conv_type: &str, cnv: &Conv,
 +                      arg: @ast::expr, buf: @ast::expr) -> @ast::expr {
         let fname = ~"conv_" + conv_type;
         let path = make_path_vec(cx, @fname);
         let cnv_expr = make_rt_conv_expr(cx, sp, cnv);
 -        let args = ~[cnv_expr, arg];
 +        let args = ~[cnv_expr, arg, buf];
         return mk_call_global(cx, arg.span, path, args);
+    }
 -    fn make_new_conv(cx: @ext_ctxt, sp: span, cnv: &Conv, arg: @ast::expr) ->
 -       @ast::expr {
 -        // FIXME: Move validation code into core::extfmt (Issue #2249)
+-
 +    fn make_new_conv(cx: @ext_ctxt, sp: span, cnv: &Conv,
 +                     arg: @ast::expr, buf: @ast::expr) -> @ast::expr {
         fn is_signed_type(cnv: &Conv) -> bool {
             match cnv.ty {
               TyInt(s) => match s {
           CountIs(_) => (),
           _ => cx.span_unimpl(sp, unsupported)
+        }
 -        match cnv.ty {
 -          TyStr => return make_conv_call(cx, arg.span, ~"str", cnv, arg),
 -          TyInt(sign) => match sign {
 -            Signed => return make_conv_call(cx, arg.span, ~"int", cnv, arg),
 -            Unsigned => {
 -                return make_conv_call(cx, arg.span, ~"uint", cnv, arg)
 -            }
 -          },
 -          TyBool => return make_conv_call(cx, arg.span, ~"bool", cnv, arg),
 -          TyChar => return make_conv_call(cx, arg.span, ~"char", cnv, arg),
 -          TyHex(_) => {
 -            return make_conv_call(cx, arg.span, ~"uint", cnv, arg);
 -          }
 -          TyBits => return make_conv_call(cx, arg.span, ~"uint", cnv, arg),
 -          TyOctal => return make_conv_call(cx, arg.span, ~"uint", cnv, arg),
 -          TyFloat => {
 -            return make_conv_call(cx, arg.span, ~"float", cnv, arg);
 -          }
 -          TyPoly => return make_conv_call(cx, arg.span, ~"poly", cnv,
 -                       mk_addr_of(cx, sp, arg))
 -        }
 +        let (name, actual_arg) = match cnv.ty {
 +            TyStr => ("str", arg),
 +            TyInt(Signed) => ("int", arg),
 +            TyBool => ("bool", arg),
 +            TyChar => ("char", arg),
 +            TyBits | TyOctal | TyHex(_) | TyInt(Unsigned) => ("uint", arg),
 +            TyFloat => ("float", arg),
 +            TyPoly => ("poly", mk_addr_of(cx, sp, arg))
 +        };
 +        return make_conv_call(cx, arg.span, name, cnv, actual_arg,
 +                              mk_mut_addr_of(cx, arg.span, buf));
+    }
     fn log_conv(c: &Conv) {
 +        debug!("Building conversion:");
         match c.param {
           Some(p) => { debug!("param: %s", p.to_str()); }
           _ => debug!("param: none")
           TyPoly => debug!("type: poly")
+        }
+    }
++
     let fmt_sp = args[0].span;
     let mut n = 0u;
 -    let mut piece_exprs = ~[];
     let nargs = args.len();
 -    for pieces.each |pc| {
 -        match *pc {
 -          PieceString(ref s) => {
 -            piece_exprs.push(mk_uniq_str(cx, fmt_sp, copy *s))
 -          }
 -          PieceConv(ref conv) => {
 -            n += 1u;
 -            if n >= nargs {
 -                cx.span_fatal(sp,
 -                              ~"not enough arguments to fmt! " +
++
 +    /* 'ident' is the local buffer building up the result of fmt! */
 +    let ident = cx.parse_sess().interner.intern(@~"__fmtbuf");
 +    let buf = || mk_path(cx, fmt_sp, ~[ident]);
 +    let str_ident = cx.parse_sess().interner.intern(@~"str");
 +    let push_ident = cx.parse_sess().interner.intern(@~"push_str");
 +    let mut stms = ~[];
++
 +    /* Translate each piece (portion of the fmt expression) by invoking the
 +       corresponding function in core::unstable::extfmt. Each function takes a
 +       buffer to insert data into along with the data being formatted. */
 +    do vec::consume(pieces) |i, pc| {
 +        match pc {
 +            /* Raw strings get appended via str::push_str */
 +            PieceString(s) => {
 +                let portion = mk_uniq_str(cx, fmt_sp, s);
++
 +                /* If this is the first portion, then initialize the local
 +                   buffer with it directly */
 +                if i == 0 {
 +                    stms.push(mk_local(cx, fmt_sp, true, ident, portion));
 +                } else {
 +                    let args = ~[mk_mut_addr_of(cx, fmt_sp, buf()), portion];
 +                    let call = mk_call_global(cx,
 +                                              fmt_sp,
 +                                              ~[str_ident, push_ident],
 +                                              args);
 +                    stms.push(mk_stmt(cx, fmt_sp, call));
 +                }
 +            }
++
 +            /* Invoke the correct conv function in extfmt */
 +            PieceConv(ref conv) => {
 +                n += 1u;
 +                if n >= nargs {
 +                    cx.span_fatal(sp,
 +                                  ~"not enough arguments to fmt! " +
                                   ~"for the given format string");
 +                }
++
 +                log_conv(conv);
 +                /* If the first portion is a conversion, then the local buffer
 +                   must be initialized as an empty string */
 +                if i == 0 {
 +                    stms.push(mk_local(cx, fmt_sp, true, ident,
 +                                       mk_uniq_str(cx, fmt_sp, ~"")));
 +                }
 +                stms.push(mk_stmt(cx, fmt_sp,
 +                                  make_new_conv(cx, fmt_sp, conv,
 +                                                args[n], buf())));
+            }
 -            debug!("Building conversion:");
 -            log_conv(conv);
 -            let arg_expr = args[n];
 -            let c_expr = make_new_conv(
 -                cx,
 -                fmt_sp,
 -                conv,
 -                arg_expr
 -            );
 -            piece_exprs.push(c_expr);
 -          }
+        }
+    }
 -    let expected_nargs = n + 1u; // n conversions + the fmt string
 +    let expected_nargs = n + 1u; // n conversions + the fmt string
     if expected_nargs < nargs {
         cx.span_fatal
             (sp, fmt!("too many arguments to fmt!. found %u, expected %u",
                            nargs, expected_nargs));
+    }
 -    let arg_vec = mk_fixed_vec_e(cx, fmt_sp, piece_exprs);
 -    return mk_call_global(cx,
 -                          fmt_sp,
 -                          ~[cx.parse_sess().interner.intern(@~"str"),
 -                            cx.parse_sess().interner.intern(@~"concat")],
 -                          ~[arg_vec]);
 +    return mk_block(cx, fmt_sp, ~[], stms, Some(buf()));
+}
 //
 // Local Variables: