about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2016-10-22 06:37:30 -0700
committerGitHub <noreply@github.com>2016-10-22 06:37:30 -0700
commitb5f6d7ec2d4e231b9ef0c8a9e8e7ec8a7f67d2ae (patch)
tree0a7acb4a9bbeef0fde0b8be462f15d75e9ec9e32
parent4879166194ed63ebd2a8c3ce8db1ccde4a6a1920 (diff)
parent94771a177ba133bfadd0ffba55d82e7e3989ea06 (diff)
downloadrust-b5f6d7ec2d4e231b9ef0c8a9e8e7ec8a7f67d2ae.tar.gz
rust-b5f6d7ec2d4e231b9ef0c8a9e8e7ec8a7f67d2ae.zip
Auto merge of #37298 - nnethercote:faster-deflate, r=alexcrichton
Use a faster `deflate` setting

In #37086 we have considered various ideas for reducing the cost of LLVM bytecode compression. This PR implements the simplest of these: use a faster `deflate` setting. It's very simple and reduces the compression time by almost half while increasing the size of the resulting rlibs by only about 2%.

I looked at using zstd, which might be able to halve the compression time again. But integrating zstd is beyond my Rust FFI integration abilities at the moment -- it consists of a few dozen C files, has a non-trivial build system, etc. I decided it was worth getting a big chunk of the possible improvement with minimum effort.

The following table shows the before and after percentages of instructions executed during compression while doing debug builds of some of the rustc-benchmarks with a stage1 compiler.
```
html5ever-2016-08-25      1.4% -> 0.7%
hyper.0.5.0               3.8% -> 2.4%
inflate-0.1.0             1.0% -> 0.5%
piston-image-0.10.3       2.9% -> 1.8%
regex.0.1.30              3.4% -> 2.1%
rust-encoding-0.3.0       4.8% -> 2.9%
syntex-0.42.2             2.9% -> 1.8%
syntex-0.42.2-incr-clean 14.2% -> 8.9%
```
The omitted ones spend 0% of their time in decompression.

And here are actual timings:
```
futures-rs-test  4.110s vs  4.102s --> 1.002x faster (variance: 1.017x, 1.004x)
helloworld       0.223s vs  0.226s --> 0.986x faster (variance: 1.012x, 1.022x)
html5ever-2016-  4.218s vs  4.186s --> 1.008x faster (variance: 1.008x, 1.010x)
hyper.0.5.0      4.746s vs  4.661s --> 1.018x faster (variance: 1.002x, 1.016x)
inflate-0.1.0    4.194s vs  4.143s --> 1.012x faster (variance: 1.007x, 1.006x)
issue-32062-equ  0.317s vs  0.316s --> 1.001x faster (variance: 1.013x, 1.005x)
issue-32278-big  1.811s vs  1.825s --> 0.992x faster (variance: 1.014x, 1.006x)
jld-day15-parse  1.412s vs  1.412s --> 1.001x faster (variance: 1.019x, 1.008x)
piston-image-0. 11.058s vs 10.977s --> 1.007x faster (variance: 1.008x, 1.039x)
reddit-stress    2.331s vs  2.342s --> 0.995x faster (variance: 1.019x, 1.006x)
regex.0.1.30     2.294s vs  2.276s --> 1.008x faster (variance: 1.007x, 1.007x)
rust-encoding-0  1.963s vs  1.924s --> 1.020x faster (variance: 1.009x, 1.006x)
syntex-0.42.2   29.667s vs 29.391s --> 1.009x faster (variance: 1.002x, 1.023x)
syntex-0.42.2-i 15.257s vs 14.148s --> 1.078x faster (variance: 1.018x, 1.008x)
```

r? @alexcrichton
-rw-r--r--src/libflate/lib.rs35
1 files changed, 10 insertions, 25 deletions
diff --git a/src/libflate/lib.rs b/src/libflate/lib.rs
index 63913f2878c..3c608ef9c92 100644
--- a/src/libflate/lib.rs
+++ b/src/libflate/lib.rs
@@ -94,11 +94,14 @@ extern "C" {
                                     -> *mut c_void;
 }
 
-const LZ_NORM: c_int = 0x80;  // LZ with 128 probes, "normal"
-const TINFL_FLAG_PARSE_ZLIB_HEADER: c_int = 0x1; // parse zlib header and adler32 checksum
-const TDEFL_WRITE_ZLIB_HEADER: c_int = 0x01000; // write zlib header and adler32 checksum
+const LZ_FAST: c_int = 0x01;  // LZ with 1 probe, "fast"
+const TDEFL_GREEDY_PARSING_FLAG: c_int = 0x04000; // fast greedy parsing instead of lazy parsing
 
-fn deflate_bytes_internal(bytes: &[u8], flags: c_int) -> Bytes {
+/// Compress a buffer without writing any sort of header on the output. Fast
+/// compression is used because it is almost twice as fast as default
+/// compression and the compression ratio is only marginally worse.
+pub fn deflate_bytes(bytes: &[u8]) -> Bytes {
+    let flags = LZ_FAST | TDEFL_GREEDY_PARSING_FLAG;
     unsafe {
         let mut outsz: size_t = 0;
         let res = tdefl_compress_mem_to_heap(bytes.as_ptr() as *const _,
@@ -113,17 +116,9 @@ fn deflate_bytes_internal(bytes: &[u8], flags: c_int) -> Bytes {
     }
 }
 
-/// Compress a buffer, without writing any sort of header on the output.
-pub fn deflate_bytes(bytes: &[u8]) -> Bytes {
-    deflate_bytes_internal(bytes, LZ_NORM)
-}
-
-/// Compress a buffer, using a header that zlib can understand.
-pub fn deflate_bytes_zlib(bytes: &[u8]) -> Bytes {
-    deflate_bytes_internal(bytes, LZ_NORM | TDEFL_WRITE_ZLIB_HEADER)
-}
-
-fn inflate_bytes_internal(bytes: &[u8], flags: c_int) -> Result<Bytes, Error> {
+/// Decompress a buffer without parsing any sort of header on the input.
+pub fn inflate_bytes(bytes: &[u8]) -> Result<Bytes, Error> {
+    let flags = 0;
     unsafe {
         let mut outsz: size_t = 0;
         let res = tinfl_decompress_mem_to_heap(bytes.as_ptr() as *const _,
@@ -141,16 +136,6 @@ fn inflate_bytes_internal(bytes: &[u8], flags: c_int) -> Result<Bytes, Error> {
     }
 }
 
-/// Decompress a buffer, without parsing any sort of header on the input.
-pub fn inflate_bytes(bytes: &[u8]) -> Result<Bytes, Error> {
-    inflate_bytes_internal(bytes, 0)
-}
-
-/// Decompress a buffer that starts with a zlib header.
-pub fn inflate_bytes_zlib(bytes: &[u8]) -> Result<Bytes, Error> {
-    inflate_bytes_internal(bytes, TINFL_FLAG_PARSE_ZLIB_HEADER)
-}
-
 #[cfg(test)]
 mod tests {
     #![allow(deprecated)]