about summary refs log tree commit diff
path: root/src/libstd
diff options
context:
space:
mode:
authorAlex Crichton <alex@alexcrichton.com>2013-08-10 00:28:47 -0700
committerAlex Crichton <alex@alexcrichton.com>2013-08-12 23:18:51 -0700
commitb820748ff5a2bc09b58dd7ac511e0f607d55f2e9 (patch)
tree7a34e364ae23968eadba2e7bc0ecae8e20b3fced /src/libstd
parent44675ac6aff91889f960655b0034964740415e8c (diff)
downloadrust-b820748ff5a2bc09b58dd7ac511e0f607d55f2e9.tar.gz
rust-b820748ff5a2bc09b58dd7ac511e0f607d55f2e9.zip
Implement formatting arguments for strings and integers
Closes #1651
Diffstat (limited to 'src/libstd')
-rw-r--r--src/libstd/char.rs38
-rw-r--r--src/libstd/fmt/mod.rs167
-rw-r--r--src/libstd/str.rs53
-rw-r--r--src/libstd/unstable/raw.rs1
4 files changed, 196 insertions, 63 deletions
diff --git a/src/libstd/char.rs b/src/libstd/char.rs
index 9c55e22b1f8..a0635f6f12e 100644
--- a/src/libstd/char.rs
+++ b/src/libstd/char.rs
@@ -20,6 +20,15 @@ use unicode::{derived_property, general_category};
 #[cfg(not(test))] use cmp::{Eq, Ord};
 #[cfg(not(test))] use num::Zero;
 
+// UTF-8 ranges and tags for encoding characters
+static TAG_CONT: uint = 128u;
+static MAX_ONE_B: uint = 128u;
+static TAG_TWO_B: uint = 192u;
+static MAX_TWO_B: uint = 2048u;
+static TAG_THREE_B: uint = 224u;
+static MAX_THREE_B: uint = 65536u;
+static TAG_FOUR_B: uint = 240u;
+
 /*
     Lu  Uppercase_Letter        an uppercase letter
     Ll  Lowercase_Letter        a lowercase letter
@@ -278,6 +287,12 @@ pub trait Char {
     fn escape_unicode(&self, f: &fn(char));
     fn escape_default(&self, f: &fn(char));
     fn len_utf8_bytes(&self) -> uint;
+
+    /// Encodes this character as utf-8 into the provided byte-buffer. The
+    /// buffer must be at least 4 bytes long or a runtime failure will occur.
+    ///
+    /// This will then return the number of characters written to the slice.
+    fn encode_utf8(&self, dst: &mut [u8]) -> uint;
 }
 
 impl Char for char {
@@ -308,6 +323,29 @@ impl Char for char {
     fn escape_default(&self, f: &fn(char)) { escape_default(*self, f) }
 
     fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) }
+
+    fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> uint {
+        let code = *self as uint;
+        if code < MAX_ONE_B {
+            dst[0] = code as u8;
+            return 1;
+        } else if code < MAX_TWO_B {
+            dst[0] = (code >> 6u & 31u | TAG_TWO_B) as u8;
+            dst[1] = (code & 63u | TAG_CONT) as u8;
+            return 2;
+        } else if code < MAX_THREE_B {
+            dst[0] = (code >> 12u & 15u | TAG_THREE_B) as u8;
+            dst[1] = (code >> 6u & 63u | TAG_CONT) as u8;
+            dst[2] = (code & 63u | TAG_CONT) as u8;
+            return 3;
+        } else {
+            dst[0] = (code >> 18u & 7u | TAG_FOUR_B) as u8;
+            dst[1] = (code >> 12u & 63u | TAG_CONT) as u8;
+            dst[2] = (code >> 6u & 63u | TAG_CONT) as u8;
+            dst[3] = (code & 63u | TAG_CONT) as u8;
+            return 4;
+        }
+    }
 }
 
 #[cfg(not(test))]
diff --git a/src/libstd/fmt/mod.rs b/src/libstd/fmt/mod.rs
index 2b8807b2291..a25620cfa69 100644
--- a/src/libstd/fmt/mod.rs
+++ b/src/libstd/fmt/mod.rs
@@ -11,7 +11,7 @@
 use prelude::*;
 
 use cast;
-use int;
+use char::Char;
 use rt::io::Decorator;
 use rt::io::mem::MemWriter;
 use rt::io;
@@ -122,6 +122,11 @@ pub unsafe fn sprintf(fmt: &[rt::Piece], args: &[Argument]) -> ~str {
 }
 
 impl<'self> Formatter<'self> {
+
+    // First up is the collection of functions used to execute a format string
+    // at runtime. This consumes all of the compile-time statics generated by
+    // the ifmt! syntax extension.
+
     fn run(&mut self, piece: &rt::Piece, cur: Option<&str>) {
         let setcount = |slot: &mut Option<uint>, cnt: &parse::Count| {
             match *cnt {
@@ -240,6 +245,118 @@ impl<'self> Formatter<'self> {
             }
         }
     }
+
+    // Helper methods used for padding and processing formatting arguments that
+    // all formatting traits can use.
+
+    /// TODO: dox
+    pub fn pad_integral(&mut self, s: &[u8], alternate_prefix: &str,
+                        positive: bool) {
+        use fmt::parse::{FlagAlternate, FlagSignPlus};
+
+        let mut actual_len = s.len();
+        if self.flags & 1 << (FlagAlternate as uint) != 0 {
+            actual_len += alternate_prefix.len();
+        }
+        if self.flags & 1 << (FlagSignPlus as uint) != 0 {
+            actual_len += 1;
+        }
+        if !positive {
+            actual_len += 1;
+        }
+
+        let emit = |this: &mut Formatter| {
+            if this.flags & 1 << (FlagSignPlus as uint) != 0 && positive {
+                this.buf.write(['+' as u8]);
+            } else if !positive {
+                this.buf.write(['-' as u8]);
+            }
+            if this.flags & 1 << (FlagAlternate as uint) != 0 {
+                this.buf.write(alternate_prefix.as_bytes());
+            }
+            this.buf.write(s);
+        };
+
+        match self.width {
+            None => { emit(self) }
+            Some(min) if actual_len >= min => { emit(self) }
+            Some(min) => {
+                do self.with_padding(min - actual_len) |me| {
+                    emit(me);
+                }
+            }
+        }
+    }
+
+    /// This function takes a string slice and emits it to the internal buffer
+    /// after applying the relevant formatting flags specified. The flags
+    /// recognized for generic strings are:
+    ///
+    /// * width - the minimum width of what to emit
+    /// * fill/alignleft - what to emit and where to emit it if the string
+    ///                    provided needs to be padded
+    /// * precision - the maximum length to emit, the string is truncated if it
+    ///               is longer than this length
+    ///
+    /// Notably this function ignored the `flag` parameters
+    pub fn pad(&mut self, s: &str) {
+        // Make sure there's a fast path up front
+        if self.width.is_none() && self.precision.is_none() {
+            self.buf.write(s.as_bytes());
+            return
+        }
+        // The `precision` field can be interpreted as a `max-width` for the
+        // string being formatted
+        match self.precision {
+            Some(max) => {
+                // If there's a maximum width and our string is longer than
+                // that, then we must always have truncation. This is the only
+                // case where the maximum length will matter.
+                let char_len = s.char_len();
+                if char_len >= max {
+                    let nchars = uint::min(max, char_len);
+                    self.buf.write(s.slice_chars(0, nchars).as_bytes());
+                    return
+                }
+            }
+            None => {}
+        }
+
+        // The `width` field is more of a `min-width` parameter at this point.
+        match self.width {
+            // If we're under the maximum length, and there's no minimum length
+            // requirements, then we can just emit the string
+            None => { self.buf.write(s.as_bytes()) }
+
+            // If we're under the maximum width, check if we're over the minimum
+            // width, if so it's as easy as just emitting the string.
+            Some(width) if s.char_len() >= width => {
+                self.buf.write(s.as_bytes())
+            }
+
+            // If we're under both the maximum and the minimum width, then fill
+            // up the minimum width with the specified string + some alignment.
+            Some(width) => {
+                do self.with_padding(width - s.len()) |me| {
+                    me.buf.write(s.as_bytes());
+                }
+            }
+        }
+    }
+
+    fn with_padding(&mut self, padding: uint, f: &fn(&mut Formatter)) {
+        if self.alignleft {
+            f(self);
+        }
+        let mut fill = [0u8, ..4];
+        let len = self.fill.encode_utf8(fill);
+        for _ in range(0, padding) {
+            self.buf.write(fill.slice_to(len));
+        }
+        if !self.alignleft {
+            f(self);
+        }
+    }
 }
 
 /// This is a function which calls are emitted to by the compiler itself to
@@ -279,60 +396,53 @@ impl Bool for bool {
 
 impl<'self> String for &'self str {
     fn fmt(s: & &'self str, f: &mut Formatter) {
-        // XXX: formatting args
-        f.buf.write(s.as_bytes())
+        f.pad(*s);
     }
 }
 
 impl Char for char {
     fn fmt(c: &char, f: &mut Formatter) {
-        // XXX: formatting args
-        // XXX: shouldn't require an allocation
-        let mut s = ~"";
-        s.push_char(*c);
-        f.buf.write(s.as_bytes());
+        let mut utf8 = [0u8, ..4];
+        let amt = c.encode_utf8(utf8);
+        let s: &str = unsafe { cast::transmute(utf8.slice_to(amt)) };
+        String::fmt(&s, f);
     }
 }
 
 impl Signed for int {
     fn fmt(c: &int, f: &mut Formatter) {
-        // XXX: formatting args
-        do int::to_str_bytes(*c, 10) |buf| {
-            f.buf.write(buf);
+        do uint::to_str_bytes(c.abs() as uint, 10) |buf| {
+            f.pad_integral(buf, "", *c >= 0);
         }
     }
 }
 
 impl Unsigned for uint {
     fn fmt(c: &uint, f: &mut Formatter) {
-        // XXX: formatting args
         do uint::to_str_bytes(*c, 10) |buf| {
-            f.buf.write(buf);
+            f.pad_integral(buf, "", true);
         }
     }
 }
 
 impl Octal for uint {
     fn fmt(c: &uint, f: &mut Formatter) {
-        // XXX: formatting args
         do uint::to_str_bytes(*c, 8) |buf| {
-            f.buf.write(buf);
+            f.pad_integral(buf, "0o", true);
         }
     }
 }
 
 impl LowerHex for uint {
     fn fmt(c: &uint, f: &mut Formatter) {
-        // XXX: formatting args
         do uint::to_str_bytes(*c, 16) |buf| {
-            f.buf.write(buf);
+            f.pad_integral(buf, "0x", true);
         }
     }
 }
 
 impl UpperHex for uint {
     fn fmt(c: &uint, f: &mut Formatter) {
-        // XXX: formatting args
         do uint::to_str_bytes(*c, 16) |buf| {
             let mut local = [0u8, ..16];
             for (l, &b) in local.mut_iter().zip(buf.iter()) {
@@ -341,16 +451,29 @@ impl UpperHex for uint {
                     _ => b,
                 };
             }
-            f.buf.write(local.slice_to(buf.len()));
+            f.pad_integral(local.slice_to(buf.len()), "0x", true);
         }
     }
 }
 
 impl<T> Poly for T {
     fn fmt(t: &T, f: &mut Formatter) {
-        // XXX: formatting args
-        let s = sys::log_str(t);
-        f.buf.write(s.as_bytes());
+        match (f.width, f.precision) {
+            (None, None) => {
+                // XXX: sys::log_str should have a variant which takes a stream
+                //      and we should directly call that (avoids unnecessary
+                //      allocations)
+                let s = sys::log_str(t);
+                f.buf.write(s.as_bytes());
+            }
+
+            // If we have a specified width for formatting, then we have to make
+            // this allocation of a new string
+            _ => {
+                let s = sys::log_str(t);
+                f.pad(s);
+            }
+        }
     }
 }
 
diff --git a/src/libstd/str.rs b/src/libstd/str.rs
index 9e5f2db4092..10df45922b5 100644
--- a/src/libstd/str.rs
+++ b/src/libstd/str.rs
@@ -33,6 +33,7 @@ use ptr;
 use ptr::RawPtr;
 use to_str::ToStr;
 use uint;
+use unstable::raw::{Repr, Slice};
 use vec;
 use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector};
 
@@ -758,15 +759,7 @@ macro_rules! utf8_acc_cont_byte(
     ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as uint)
 )
 
-// UTF-8 tags and ranges
 static TAG_CONT_U8: u8 = 128u8;
-static TAG_CONT: uint = 128u;
-static MAX_ONE_B: uint = 128u;
-static TAG_TWO_B: uint = 192u;
-static MAX_TWO_B: uint = 2048u;
-static TAG_THREE_B: uint = 224u;
-static MAX_THREE_B: uint = 65536u;
-static TAG_FOUR_B: uint = 240u;
 static MAX_UNICODE: uint = 1114112u;
 
 /// Unsafe operations
@@ -1988,40 +1981,18 @@ impl OwnedStr for ~str {
     #[inline]
     fn push_char(&mut self, c: char) {
         assert!((c as uint) < MAX_UNICODE); // FIXME: #7609: should be enforced on all `char`
+        let cur_len = self.len();
+        self.reserve_at_least(cur_len + 4); // may use up to 4 bytes
+
+        // Attempt to not use an intermediate buffer by just pushing bytes
+        // directly onto this string.
         unsafe {
-            let code = c as uint;
-            let nb = if code < MAX_ONE_B { 1u }
-            else if code < MAX_TWO_B { 2u }
-            else if code < MAX_THREE_B { 3u }
-            else { 4u };
-            let len = self.len();
-            let new_len = len + nb;
-            self.reserve_at_least(new_len);
-            let off = len as int;
-            do self.as_mut_buf |buf, _len| {
-                match nb {
-                    1u => {
-                        *ptr::mut_offset(buf, off) = code as u8;
-                    }
-                    2u => {
-                        *ptr::mut_offset(buf, off) = (code >> 6u & 31u | TAG_TWO_B) as u8;
-                        *ptr::mut_offset(buf, off + 1) = (code & 63u | TAG_CONT) as u8;
-                    }
-                    3u => {
-                        *ptr::mut_offset(buf, off) = (code >> 12u & 15u | TAG_THREE_B) as u8;
-                        *ptr::mut_offset(buf, off + 1) = (code >> 6u & 63u | TAG_CONT) as u8;
-                        *ptr::mut_offset(buf, off + 2) = (code & 63u | TAG_CONT) as u8;
-                    }
-                    4u => {
-                        *ptr::mut_offset(buf, off) = (code >> 18u & 7u | TAG_FOUR_B) as u8;
-                        *ptr::mut_offset(buf, off + 1) = (code >> 12u & 63u | TAG_CONT) as u8;
-                        *ptr::mut_offset(buf, off + 2) = (code >> 6u & 63u | TAG_CONT) as u8;
-                        *ptr::mut_offset(buf, off + 3) = (code & 63u | TAG_CONT) as u8;
-                    }
-                    _ => {}
-                }
-            }
-            raw::set_len(self, new_len);
+            let v = self.repr();
+            let len = c.encode_utf8(cast::transmute(Slice {
+                data: ((&(*v).data) as *u8).offset(cur_len as int),
+                len: 4,
+            }));
+            raw::set_len(self, cur_len + len);
         }
     }
 
diff --git a/src/libstd/unstable/raw.rs b/src/libstd/unstable/raw.rs
index 0e074b53d6b..bdf84604fb3 100644
--- a/src/libstd/unstable/raw.rs
+++ b/src/libstd/unstable/raw.rs
@@ -56,6 +56,7 @@ impl<'self, T> Repr<Slice<T>> for &'self [T] {}
 impl<'self> Repr<Slice<u8>> for &'self str {}
 impl<T> Repr<*Box<T>> for @T {}
 impl<T> Repr<*Box<Vec<T>>> for @[T] {}
+impl Repr<*String> for ~str {}
 
 // sure would be nice to have this
 // impl<T> Repr<*Vec<T>> for ~[T] {}