auto merge of #11498 : c-a/rust/optimize_vuint_at, r=alexcrichton

Use a lookup table, SHIFT_MASK_TABLE, that for every possible four bit prefix holds the number of times the value should be right shifted and what the right shifted value should be masked with. This way we can get rid of the branches which in my testing gives approximately a 2x speedup. Timings on Intel(R) Core(TM) i5-3570K CPU @ 3.40GHz -- Before -- running 5 tests test ebml::tests::test_vuint_at ... ok test ebml::bench::vuint_at_A_aligned ... bench: 494 ns/iter (+/- 3) test ebml::bench::vuint_at_A_unaligned ... bench: 494 ns/iter (+/- 4) test ebml::bench::vuint_at_D_aligned ... bench: 467 ns/iter (+/- 5) test ebml::bench::vuint_at_D_unaligned ... bench: 467 ns/iter (+/- 5) -- After -- running 5 tests test ebml::tests::test_vuint_at ... ok test ebml::bench::vuint_at_A_aligned ... bench: 181 ns/iter (+/- 2) test ebml::bench::vuint_at_A_unaligned ... bench: 192 ns/iter (+/- 1) test ebml::bench::vuint_at_D_aligned ... bench: 181 ns/iter (+/- 3) test ebml::bench::vuint_at_D_unaligned ... bench: 197 ns/iter (+/- 6)
author: bors <bors@rust-lang.org> 2014-01-17 00:01:56 -0800
committer: bors <bors@rust-lang.org> 2014-01-17 00:01:56 -0800
commit: 93fb12e3d0644f6a8ddfa2ac1d6b0a1d8341e287 (patch)
tree: 172385992ea44c1eeb621c3905f51a7838401f2a
parent: 5fdc81262a5d44f10e335384b5d69b938d6d729c (diff)
parent: f4c9ed42aa1f5b83aa2f0ee3fbb5a89919d208d4 (diff)
download: rust-93fb12e3d0644f6a8ddfa2ac1d6b0a1d8341e287.tar.gz
rust-93fb12e3d0644f6a8ddfa2ac1d6b0a1d8341e287.zip
1 files changed, 79 insertions, 23 deletions
diff --git a/src/libextra/ebml.rs b/src/libextra/ebml.rs
index 3798ed8617e..a927c3b02bc 100644
--- a/src/libextra/ebml.rs
+++ b/src/libextra/ebml.rs
@@ -90,7 +90,7 @@ pub mod reader {
 
     // ebml reading
 
-    struct Res {
+    pub struct Res {
         val: uint,
         next: uint
     }
@@ -130,32 +130,40 @@ pub mod reader {
             return vuint_at_slow(data, start);
         }
 
+        // Lookup table for parsing EBML Element IDs as per http://ebml.sourceforge.net/specs/
+        // The Element IDs are parsed by reading a big endian u32 positioned at data[start].
+        // Using the four most significant bits of the u32 we lookup in the table below how the
+        // element ID should be derived from it.
+        //
+        // The table stores tuples (shift, mask) where shift is the number the u32 should be right
+        // shifted with and mask is the value the right shifted value should be masked with.
+        // If for example the most significant bit is set this means it's a class A ID and the u32
+        // should be right shifted with 24 and masked with 0x7f. Therefore we store (24, 0x7f) at
+        // index 0x8 - 0xF (four bit numbers where the most significant bit is set).
+        //
+        // By storing the number of shifts and masks in a table instead of checking in order if
+        // the most significant bit is set, the second most significant bit is set etc. we can
+        // replace up to three "and+branch" with a single table lookup which gives us a measured
+        // speedup of around 2x on x86_64.
+        static SHIFT_MASK_TABLE: [(u32, u32), ..16] = [
+            (0, 0x0), (0, 0x0fffffff),
+            (8, 0x1fffff), (8, 0x1fffff),
+            (16, 0x3fff), (16, 0x3fff), (16, 0x3fff), (16, 0x3fff),
+            (24, 0x7f), (24, 0x7f), (24, 0x7f), (24, 0x7f),
+            (24, 0x7f), (24, 0x7f), (24, 0x7f), (24, 0x7f)
+        ];
+
         unsafe {
             let (ptr, _): (*u8, uint) = transmute(data);
             let ptr = offset(ptr, start as int);
             let ptr: *i32 = transmute(ptr);
-            let val = from_be32(*ptr);
-            let val: u32 = transmute(val);
-            if (val & 0x80000000) != 0 {
-                Res {
-                    val: ((val >> 24) & 0x7f) as uint,
-                    next: start + 1
-                }
-            } else if (val & 0x40000000) != 0 {
-                Res {
-                    val: ((val >> 16) & 0x3fff) as uint,
-                    next: start + 2
-                }
-            } else if (val & 0x20000000) != 0 {
-                Res {
-                    val: ((val >> 8) & 0x1fffff) as uint,
-                    next: start + 3
-                }
-            } else {
-                Res {
-                    val: (val & 0x0fffffff) as uint,
-                    next: start + 4
-                }
+            let val = from_be32(*ptr) as u32;
+
+            let i = (val >> 28u) as uint;
+            let (shift, mask) = SHIFT_MASK_TABLE[i];
+            Res {
+                val: ((val >> shift) & mask) as uint,
+                next: start + (((32 - shift) >> 3) as uint)
             }
         }
     }
@@ -939,6 +947,54 @@ mod tests {
     use std::option::{None, Option, Some};
 
     #[test]
+    fn test_vuint_at() {
+        let data = [
+            0x80,
+            0xff,
+            0x40, 0x00,
+            0x7f, 0xff,
+            0x20, 0x00, 0x00,
+            0x3f, 0xff, 0xff,
+            0x10, 0x00, 0x00, 0x00,
+            0x1f, 0xff, 0xff, 0xff
+        ];
+
+        let mut res: reader::Res;
+
+        // Class A
+        res = reader::vuint_at(data, 0);
+        assert_eq!(res.val, 0);
+        assert_eq!(res.next, 1);
+        res = reader::vuint_at(data, res.next);
+        assert_eq!(res.val, (1 << 7) - 1);
+        assert_eq!(res.next, 2);
+
+        // Class B
+        res = reader::vuint_at(data, res.next);
+        assert_eq!(res.val, 0);
+        assert_eq!(res.next, 4);
+        res = reader::vuint_at(data, res.next);
+        assert_eq!(res.val, (1 << 14) - 1);
+        assert_eq!(res.next, 6);
+
+        // Class C
+        res = reader::vuint_at(data, res.next);
+        assert_eq!(res.val, 0);
+        assert_eq!(res.next, 9);
+        res = reader::vuint_at(data, res.next);
+        assert_eq!(res.val, (1 << 21) - 1);
+        assert_eq!(res.next, 12);
+
+        // Class D
+        res = reader::vuint_at(data, res.next);
+        assert_eq!(res.val, 0);
+        assert_eq!(res.next, 16);
+        res = reader::vuint_at(data, res.next);
+        assert_eq!(res.val, (1 << 28) - 1);
+        assert_eq!(res.next, 20);
+    }
+
+    #[test]
     fn test_option_int() {
         fn test_v(v: Option<int>) {
             debug!("v == {:?}", v);
author	bors <bors@rust-lang.org>	2014-01-17 00:01:56 -0800
committer	bors <bors@rust-lang.org>	2014-01-17 00:01:56 -0800
commit	93fb12e3d0644f6a8ddfa2ac1d6b0a1d8341e287 (patch)
tree	172385992ea44c1eeb621c3905f51a7838401f2a
parent	5fdc81262a5d44f10e335384b5d69b938d6d729c (diff)
parent	f4c9ed42aa1f5b83aa2f0ee3fbb5a89919d208d4 (diff)
download	rust-93fb12e3d0644f6a8ddfa2ac1d6b0a1d8341e287.tar.gz rust-93fb12e3d0644f6a8ddfa2ac1d6b0a1d8341e287.zip