about summary refs log tree commit diff
path: root/src/libserialize/base64.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/libserialize/base64.rs')
-rw-r--r--src/libserialize/base64.rs367
1 files changed, 367 insertions, 0 deletions
diff --git a/src/libserialize/base64.rs b/src/libserialize/base64.rs
new file mode 100644
index 00000000000..c22eefdb330
--- /dev/null
+++ b/src/libserialize/base64.rs
@@ -0,0 +1,367 @@
+// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Base64 binary-to-text encoding
+use std::str;
+
+/// Available encoding character sets
+pub enum CharacterSet {
+    /// The standard character set (uses `+` and `/`)
+    Standard,
+    /// The URL safe character set (uses `-` and `_`)
+    UrlSafe
+}
+
+/// Contains configuration parameters for `to_base64`.
+pub struct Config {
+    /// Character set to use
+    char_set: CharacterSet,
+    /// True to pad output with `=` characters
+    pad: bool,
+    /// `Some(len)` to wrap lines at `len`, `None` to disable line wrapping
+    line_length: Option<uint>
+}
+
+/// Configuration for RFC 4648 standard base64 encoding
+pub static STANDARD: Config =
+    Config {char_set: Standard, pad: true, line_length: None};
+
+/// Configuration for RFC 4648 base64url encoding
+pub static URL_SAFE: Config =
+    Config {char_set: UrlSafe, pad: false, line_length: None};
+
+/// Configuration for RFC 2045 MIME base64 encoding
+pub static MIME: Config =
+    Config {char_set: Standard, pad: true, line_length: Some(76)};
+
+static STANDARD_CHARS: &'static[u8] = bytes!("ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+                                             "abcdefghijklmnopqrstuvwxyz",
+                                             "0123456789+/");
+
+static URLSAFE_CHARS: &'static[u8] = bytes!("ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+                                            "abcdefghijklmnopqrstuvwxyz",
+                                            "0123456789-_");
+
+/// A trait for converting a value to base64 encoding.
+pub trait ToBase64 {
+    /// Converts the value of `self` to a base64 value following the specified
+    /// format configuration, returning the owned string.
+    fn to_base64(&self, config: Config) -> ~str;
+}
+
+impl<'a> ToBase64 for &'a [u8] {
+    /**
+     * Turn a vector of `u8` bytes into a base64 string.
+     *
+     * # Example
+     *
+     * ```rust
+     * extern mod serialize;
+     * use serialize::base64::{ToBase64, STANDARD};
+     *
+     * fn main () {
+     *     let str = [52,32].to_base64(STANDARD);
+     *     println!("base 64 output: {}", str);
+     * }
+     * ```
+     */
+    fn to_base64(&self, config: Config) -> ~str {
+        let bytes = match config.char_set {
+            Standard => STANDARD_CHARS,
+            UrlSafe => URLSAFE_CHARS
+        };
+
+        let mut v: ~[u8] = ~[];
+        let mut i = 0;
+        let mut cur_length = 0;
+        let len = self.len();
+        while i < len - (len % 3) {
+            match config.line_length {
+                Some(line_length) =>
+                    if cur_length >= line_length {
+                        v.push('\r' as u8);
+                        v.push('\n' as u8);
+                        cur_length = 0;
+                    },
+                None => ()
+            }
+
+            let n = (self[i] as u32) << 16 |
+                    (self[i + 1] as u32) << 8 |
+                    (self[i + 2] as u32);
+
+            // This 24-bit number gets separated into four 6-bit numbers.
+            v.push(bytes[(n >> 18) & 63]);
+            v.push(bytes[(n >> 12) & 63]);
+            v.push(bytes[(n >> 6 ) & 63]);
+            v.push(bytes[n & 63]);
+
+            cur_length += 4;
+            i += 3;
+        }
+
+        if len % 3 != 0 {
+            match config.line_length {
+                Some(line_length) =>
+                    if cur_length >= line_length {
+                        v.push('\r' as u8);
+                        v.push('\n' as u8);
+                    },
+                None => ()
+            }
+        }
+
+        // Heh, would be cool if we knew this was exhaustive
+        // (the dream of bounded integer types)
+        match len % 3 {
+            0 => (),
+            1 => {
+                let n = (self[i] as u32) << 16;
+                v.push(bytes[(n >> 18) & 63]);
+                v.push(bytes[(n >> 12) & 63]);
+                if config.pad {
+                    v.push('=' as u8);
+                    v.push('=' as u8);
+                }
+            }
+            2 => {
+                let n = (self[i] as u32) << 16 |
+                    (self[i + 1u] as u32) << 8;
+                v.push(bytes[(n >> 18) & 63]);
+                v.push(bytes[(n >> 12) & 63]);
+                v.push(bytes[(n >> 6 ) & 63]);
+                if config.pad {
+                    v.push('=' as u8);
+                }
+            }
+            _ => fail!("Algebra is broken, please alert the math police")
+        }
+
+        unsafe {
+            str::raw::from_utf8_owned(v)
+        }
+    }
+}
+
+/// A trait for converting from base64 encoded values.
+pub trait FromBase64 {
+    /// Converts the value of `self`, interpreted as base64 encoded data, into
+    /// an owned vector of bytes, returning the vector.
+    fn from_base64(&self) -> Result<~[u8], FromBase64Error>;
+}
+
+/// Errors that can occur when decoding a base64 encoded string
+pub enum FromBase64Error {
+    /// The input contained a character not part of the base64 format
+    InvalidBase64Character(char, uint),
+    /// The input had an invalid length
+    InvalidBase64Length,
+}
+
+impl ToStr for FromBase64Error {
+    fn to_str(&self) -> ~str {
+        match *self {
+            InvalidBase64Character(ch, idx) =>
+                format!("Invalid character '{}' at position {}", ch, idx),
+            InvalidBase64Length => ~"Invalid length",
+        }
+    }
+}
+
+impl<'a> FromBase64 for &'a str {
+    /**
+     * Convert any base64 encoded string (literal, `@`, `&`, or `~`)
+     * to the byte values it encodes.
+     *
+     * You can use the `from_utf8_owned` function in `std::str`
+     * to turn a `[u8]` into a string with characters corresponding to those
+     * values.
+     *
+     * # Example
+     *
+     * This converts a string literal to base64 and back.
+     *
+     * ```rust
+     * extern mod serialize;
+     * use serialize::base64::{ToBase64, FromBase64, STANDARD};
+     * use std::str;
+     *
+     * fn main () {
+     *     let hello_str = bytes!("Hello, World").to_base64(STANDARD);
+     *     println!("base64 output: {}", hello_str);
+     *     let res = hello_str.from_base64();
+     *     if res.is_ok() {
+     *       let optBytes = str::from_utf8_owned(res.unwrap());
+     *       if optBytes.is_some() {
+     *         println!("decoded from base64: {}", optBytes.unwrap());
+     *       }
+     *     }
+     * }
+     * ```
+     */
+    fn from_base64(&self) -> Result<~[u8], FromBase64Error> {
+        let mut r = ~[];
+        let mut buf: u32 = 0;
+        let mut modulus = 0;
+
+        let mut it = self.bytes().enumerate();
+        for (idx, byte) in it {
+            let val = byte as u32;
+
+            match byte as char {
+                'A'..'Z' => buf |= val - 0x41,
+                'a'..'z' => buf |= val - 0x47,
+                '0'..'9' => buf |= val + 0x04,
+                '+'|'-' => buf |= 0x3E,
+                '/'|'_' => buf |= 0x3F,
+                '\r'|'\n' => continue,
+                '=' => break,
+                _ => return Err(InvalidBase64Character(self.char_at(idx), idx)),
+            }
+
+            buf <<= 6;
+            modulus += 1;
+            if modulus == 4 {
+                modulus = 0;
+                r.push((buf >> 22) as u8);
+                r.push((buf >> 14) as u8);
+                r.push((buf >> 6 ) as u8);
+            }
+        }
+
+        for (idx, byte) in it {
+            match byte as char {
+                '='|'\r'|'\n' => continue,
+                _ => return Err(InvalidBase64Character(self.char_at(idx), idx)),
+            }
+        }
+
+        match modulus {
+            2 => {
+                r.push((buf >> 10) as u8);
+            }
+            3 => {
+                r.push((buf >> 16) as u8);
+                r.push((buf >> 8 ) as u8);
+            }
+            0 => (),
+            _ => return Err(InvalidBase64Length),
+        }
+
+        Ok(r)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use extra::test::BenchHarness;
+    use base64::{Config, FromBase64, ToBase64, STANDARD, URL_SAFE};
+
+    #[test]
+    fn test_to_base64_basic() {
+        assert_eq!("".as_bytes().to_base64(STANDARD), ~"");
+        assert_eq!("f".as_bytes().to_base64(STANDARD), ~"Zg==");
+        assert_eq!("fo".as_bytes().to_base64(STANDARD), ~"Zm8=");
+        assert_eq!("foo".as_bytes().to_base64(STANDARD), ~"Zm9v");
+        assert_eq!("foob".as_bytes().to_base64(STANDARD), ~"Zm9vYg==");
+        assert_eq!("fooba".as_bytes().to_base64(STANDARD), ~"Zm9vYmE=");
+        assert_eq!("foobar".as_bytes().to_base64(STANDARD), ~"Zm9vYmFy");
+    }
+
+    #[test]
+    fn test_to_base64_line_break() {
+        assert!(![0u8, ..1000].to_base64(Config {line_length: None, ..STANDARD})
+                .contains("\r\n"));
+        assert_eq!("foobar".as_bytes().to_base64(Config {line_length: Some(4),
+                                                         ..STANDARD}),
+                   ~"Zm9v\r\nYmFy");
+    }
+
+    #[test]
+    fn test_to_base64_padding() {
+        assert_eq!("f".as_bytes().to_base64(Config {pad: false, ..STANDARD}), ~"Zg");
+        assert_eq!("fo".as_bytes().to_base64(Config {pad: false, ..STANDARD}), ~"Zm8");
+    }
+
+    #[test]
+    fn test_to_base64_url_safe() {
+        assert_eq!([251, 255].to_base64(URL_SAFE), ~"-_8");
+        assert_eq!([251, 255].to_base64(STANDARD), ~"+/8=");
+    }
+
+    #[test]
+    fn test_from_base64_basic() {
+        assert_eq!("".from_base64().unwrap(), "".as_bytes().to_owned());
+        assert_eq!("Zg==".from_base64().unwrap(), "f".as_bytes().to_owned());
+        assert_eq!("Zm8=".from_base64().unwrap(), "fo".as_bytes().to_owned());
+        assert_eq!("Zm9v".from_base64().unwrap(), "foo".as_bytes().to_owned());
+        assert_eq!("Zm9vYg==".from_base64().unwrap(), "foob".as_bytes().to_owned());
+        assert_eq!("Zm9vYmE=".from_base64().unwrap(), "fooba".as_bytes().to_owned());
+        assert_eq!("Zm9vYmFy".from_base64().unwrap(), "foobar".as_bytes().to_owned());
+    }
+
+    #[test]
+    fn test_from_base64_newlines() {
+        assert_eq!("Zm9v\r\nYmFy".from_base64().unwrap(),
+                   "foobar".as_bytes().to_owned());
+        assert_eq!("Zm9vYg==\r\n".from_base64().unwrap(),
+                   "foob".as_bytes().to_owned());
+    }
+
+    #[test]
+    fn test_from_base64_urlsafe() {
+        assert_eq!("-_8".from_base64().unwrap(), "+/8=".from_base64().unwrap());
+    }
+
+    #[test]
+    fn test_from_base64_invalid_char() {
+        assert!("Zm$=".from_base64().is_err())
+        assert!("Zg==$".from_base64().is_err());
+    }
+
+    #[test]
+    fn test_from_base64_invalid_padding() {
+        assert!("Z===".from_base64().is_err());
+    }
+
+    #[test]
+    fn test_base64_random() {
+        use std::rand::{task_rng, random, Rng};
+        use std::vec;
+
+        for _ in range(0, 1000) {
+            let times = task_rng().gen_range(1u, 100);
+            let v = vec::from_fn(times, |_| random::<u8>());
+            assert_eq!(v.to_base64(STANDARD).from_base64().unwrap(), v);
+        }
+    }
+
+    #[bench]
+    pub fn bench_to_base64(bh: & mut BenchHarness) {
+        let s = "イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム \
+                 ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン";
+        bh.iter(|| {
+            s.as_bytes().to_base64(STANDARD);
+        });
+        bh.bytes = s.len() as u64;
+    }
+
+    #[bench]
+    pub fn bench_from_base64(bh: & mut BenchHarness) {
+        let s = "イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム \
+                 ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン";
+        let b = s.as_bytes().to_base64(STANDARD);
+        bh.iter(|| {
+            b.from_base64().unwrap();
+        });
+        bh.bytes = b.len() as u64;
+    }
+
+}