about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAlex Crichton <alex@alexcrichton.com>2014-12-09 09:24:50 -0800
committerAlex Crichton <alex@alexcrichton.com>2014-12-09 09:24:50 -0800
commitd0ad3c7f933d575d3700d8f5124e5474dfcdbd63 (patch)
treef5acdb280d284d875a7d36d84a625a480f73707e
parentae60f9c59289adf8e46b2b3152f618caffc74bf4 (diff)
parenta119ad83c73c7e1c99c7e21fb2ab21bd9521077a (diff)
downloadrust-d0ad3c7f933d575d3700d8f5124e5474dfcdbd63.tar.gz
rust-d0ad3c7f933d575d3700d8f5124e5474dfcdbd63.zip
rollup merge of #19594: Arcterus/master
It is useful to have configurable newlines in base64 as the standard
leaves that for the implementation to decide.  GNU `base64` apparently
uses LF, which meant in `uutils` we had to manually convert the CRLF to
LF.  This made the program very slow for large inputs.

[breaking-change]
-rw-r--r--src/libserialize/base64.rs94
-rw-r--r--src/libserialize/lib.rs2
2 files changed, 64 insertions, 32 deletions
diff --git a/src/libserialize/base64.rs b/src/libserialize/base64.rs
index dd5039c9b82..59faf75c0c3 100644
--- a/src/libserialize/base64.rs
+++ b/src/libserialize/base64.rs
@@ -1,4 +1,4 @@
-// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -28,10 +28,22 @@ pub enum CharacterSet {
 
 impl Copy for CharacterSet {}
 
+/// Available newline types
+pub enum Newline {
+    /// A linefeed (i.e. Unix-style newline)
+    LF,
+    /// A carriage return and a linefeed (i.e. Windows-style newline)
+    CRLF
+}
+
+impl Copy for Newline {}
+
 /// Contains configuration parameters for `to_base64`.
 pub struct Config {
     /// Character set to use
     pub char_set: CharacterSet,
+    /// Newline to use
+    pub newline: Newline,
     /// True to pad output with `=` characters
     pub pad: bool,
     /// `Some(len)` to wrap lines at `len`, `None` to disable line wrapping
@@ -42,15 +54,15 @@ impl Copy for Config {}
 
 /// Configuration for RFC 4648 standard base64 encoding
 pub static STANDARD: Config =
-    Config {char_set: Standard, pad: true, line_length: None};
+    Config {char_set: Standard, newline: Newline::CRLF, pad: true, line_length: None};
 
 /// Configuration for RFC 4648 base64url encoding
 pub static URL_SAFE: Config =
-    Config {char_set: UrlSafe, pad: false, line_length: None};
+    Config {char_set: UrlSafe, newline: Newline::CRLF, pad: false, line_length: None};
 
 /// Configuration for RFC 2045 MIME base64 encoding
 pub static MIME: Config =
-    Config {char_set: Standard, pad: true, line_length: Some(76)};
+    Config {char_set: Standard, newline: Newline::CRLF, pad: true, line_length: Some(76)};
 
 static STANDARD_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
                                         abcdefghijklmnopqrstuvwxyz\
@@ -87,24 +99,30 @@ impl ToBase64 for [u8] {
             UrlSafe => URLSAFE_CHARS
         };
 
-        let mut v = Vec::new();
+        // In general, this Vec only needs (4/3) * self.len() memory, but
+        // addition is faster than multiplication and division.
+        let mut v = Vec::with_capacity(self.len() + self.len());
         let mut i = 0;
         let mut cur_length = 0;
         let len = self.len();
-        while i < len - (len % 3) {
-            match config.line_length {
-                Some(line_length) =>
-                    if cur_length >= line_length {
-                        v.push(b'\r');
-                        v.push(b'\n');
-                        cur_length = 0;
-                    },
-                None => ()
+        let mod_len = len % 3;
+        let cond_len = len - mod_len;
+        let newline = match config.newline {
+            Newline::LF => b"\n",
+            Newline::CRLF => b"\r\n"
+        };
+        while i < cond_len {
+            let (first, second, third) = (self[i], self[i + 1], self[i + 2]);
+            if let Some(line_length) = config.line_length {
+                if cur_length >= line_length {
+                    v.push_all(newline);
+                    cur_length = 0;
+                }
             }
 
-            let n = (self[i] as u32) << 16 |
-                    (self[i + 1] as u32) << 8 |
-                    (self[i + 2] as u32);
+            let n = (first  as u32) << 16 |
+                    (second as u32) << 8 |
+                    (third  as u32);
 
             // This 24-bit number gets separated into four 6-bit numbers.
             v.push(bytes[((n >> 18) & 63) as uint]);
@@ -116,20 +134,17 @@ impl ToBase64 for [u8] {
             i += 3;
         }
 
-        if len % 3 != 0 {
-            match config.line_length {
-                Some(line_length) =>
-                    if cur_length >= line_length {
-                        v.push(b'\r');
-                        v.push(b'\n');
-                    },
-                None => ()
+        if mod_len != 0 {
+            if let Some(line_length) = config.line_length {
+                if cur_length >= line_length {
+                    v.push_all(newline);
+                }
             }
         }
 
         // Heh, would be cool if we knew this was exhaustive
         // (the dream of bounded integer types)
-        match len % 3 {
+        match mod_len {
             0 => (),
             1 => {
                 let n = (self[i] as u32) << 16;
@@ -232,7 +247,7 @@ impl FromBase64 for str {
 
 impl FromBase64 for [u8] {
     fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> {
-        let mut r = Vec::new();
+        let mut r = Vec::with_capacity(self.len());
         let mut buf: u32 = 0;
         let mut modulus = 0i;
 
@@ -288,7 +303,7 @@ impl FromBase64 for [u8] {
 mod tests {
     extern crate test;
     use self::test::Bencher;
-    use base64::{Config, FromBase64, ToBase64, STANDARD, URL_SAFE};
+    use base64::{Config, Newline, FromBase64, ToBase64, STANDARD, URL_SAFE};
 
     #[test]
     fn test_to_base64_basic() {
@@ -302,15 +317,28 @@ mod tests {
     }
 
     #[test]
-    fn test_to_base64_line_break() {
+    fn test_to_base64_crlf_line_break() {
         assert!(![0u8, ..1000].to_base64(Config {line_length: None, ..STANDARD})
                               .contains("\r\n"));
-        assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4),
-                                                         ..STANDARD}),
+        assert_eq!(b"foobar".to_base64(Config {line_length: Some(4),
+                                               ..STANDARD}),
                    "Zm9v\r\nYmFy");
     }
 
     #[test]
+    fn test_to_base64_lf_line_break() {
+        assert!(![0u8, ..1000].to_base64(Config {line_length: None,
+                                                 newline: Newline::LF,
+                                                 ..STANDARD})
+                              .as_slice()
+                              .contains("\n"));
+        assert_eq!(b"foobar".to_base64(Config {line_length: Some(4),
+                                               newline: Newline::LF,
+                                               ..STANDARD}),
+                   "Zm9v\nYmFy");
+    }
+
+    #[test]
     fn test_to_base64_padding() {
         assert_eq!("f".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zg");
         assert_eq!("fo".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zm8");
@@ -344,6 +372,10 @@ mod tests {
                    b"foobar");
         assert_eq!("Zm9vYg==\r\n".from_base64().unwrap(),
                    b"foob");
+        assert_eq!("Zm9v\nYmFy".from_base64().unwrap(),
+                   b"foobar");
+        assert_eq!("Zm9vYg==\n".from_base64().unwrap(),
+                   b"foob");
     }
 
     #[test]
diff --git a/src/libserialize/lib.rs b/src/libserialize/lib.rs
index 9711d5c7209..1cff4c334e7 100644
--- a/src/libserialize/lib.rs
+++ b/src/libserialize/lib.rs
@@ -23,7 +23,7 @@ Core encoding and decoding interfaces.
        html_root_url = "http://doc.rust-lang.org/nightly/",
        html_playground_url = "http://play.rust-lang.org/")]
 #![allow(unknown_features)]
-#![feature(macro_rules, default_type_params, phase, slicing_syntax, globs)]
+#![feature(macro_rules, default_type_params, phase, slicing_syntax, globs, if_let)]
 
 // test harness access
 #[cfg(test)]