diff options
Diffstat (limited to 'src/libregex/test/bench.rs')
| -rw-r--r-- | src/libregex/test/bench.rs | 179 | 
1 files changed, 179 insertions, 0 deletions
| diff --git a/src/libregex/test/bench.rs b/src/libregex/test/bench.rs new file mode 100644 index 00000000000..a5667ab088e --- /dev/null +++ b/src/libregex/test/bench.rs @@ -0,0 +1,179 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use rand::{Rng, task_rng}; +use stdtest::Bencher; +use std::str; +use regex::{Regex, NoExpand}; + +fn bench_assert_match(b: &mut Bencher, re: Regex, text: &str) { + b.iter(|| if !re.is_match(text) { fail!("no match") }); +} + +#[bench] +fn no_exponential(b: &mut Bencher) { + let n = 100; + let re = Regex::new("a?".repeat(n) + "a".repeat(n)).unwrap(); + let text = "a".repeat(n); + bench_assert_match(b, re, text); +} + +#[bench] +fn literal(b: &mut Bencher) { + let re = regex!("y"); + let text = "x".repeat(50) + "y"; + bench_assert_match(b, re, text); +} + +#[bench] +fn not_literal(b: &mut Bencher) { + let re = regex!(".y"); + let text = "x".repeat(50) + "y"; + bench_assert_match(b, re, text); +} + +#[bench] +fn match_class(b: &mut Bencher) { + let re = regex!("[abcdw]"); + let text = "xxxx".repeat(20) + "w"; + bench_assert_match(b, re, text); +} + +#[bench] +fn match_class_in_range(b: &mut Bencher) { + // 'b' is between 'a' and 'c', so the class range checking doesn't help. + let re = regex!("[ac]"); + let text = "bbbb".repeat(20) + "c"; + bench_assert_match(b, re, text); +} + +#[bench] +fn replace_all(b: &mut Bencher) { + let re = regex!("[cjrw]"); + let text = "abcdefghijklmnopqrstuvwxyz"; + // FIXME: This isn't using the $name expand stuff. + // It's possible RE2/Go is using it, but currently, the expand in this + // crate is actually compiling a regex, so it's incredibly slow. + b.iter(|| re.replace_all(text, NoExpand(""))); +} + +#[bench] +fn anchored_literal_short_non_match(b: &mut Bencher) { + let re = regex!("^zbc(d|e)"); + let text = "abcdefghijklmnopqrstuvwxyz"; + b.iter(|| re.is_match(text)); +} + +#[bench] +fn anchored_literal_long_non_match(b: &mut Bencher) { + let re = regex!("^zbc(d|e)"); + let text = "abcdefghijklmnopqrstuvwxyz".repeat(15); + b.iter(|| re.is_match(text)); +} + +#[bench] +fn anchored_literal_short_match(b: &mut Bencher) { + let re = regex!("^.bc(d|e)"); + let text = "abcdefghijklmnopqrstuvwxyz"; + b.iter(|| re.is_match(text)); +} + +#[bench] +fn anchored_literal_long_match(b: &mut Bencher) { + let re = regex!("^.bc(d|e)"); + let text = "abcdefghijklmnopqrstuvwxyz".repeat(15); + b.iter(|| re.is_match(text)); +} + +#[bench] +fn one_pass_short_a(b: &mut Bencher) { + let re = regex!("^.bc(d|e)*$"); + let text = "abcddddddeeeededd"; + b.iter(|| re.is_match(text)); +} + +#[bench] +fn one_pass_short_a_not(b: &mut Bencher) { + let re = regex!(".bc(d|e)*$"); + let text = "abcddddddeeeededd"; + b.iter(|| re.is_match(text)); +} + +#[bench] +fn one_pass_short_b(b: &mut Bencher) { + let re = regex!("^.bc(?:d|e)*$"); + let text = "abcddddddeeeededd"; + b.iter(|| re.is_match(text)); +} + +#[bench] +fn one_pass_short_b_not(b: &mut Bencher) { + let re = regex!(".bc(?:d|e)*$"); + let text = "abcddddddeeeededd"; + b.iter(|| re.is_match(text)); +} + +#[bench] +fn one_pass_long_prefix(b: &mut Bencher) { + let re = regex!("^abcdefghijklmnopqrstuvwxyz.*$"); + let text = "abcdefghijklmnopqrstuvwxyz"; + b.iter(|| re.is_match(text)); +} + +#[bench] +fn one_pass_long_prefix_not(b: &mut Bencher) { + let re = regex!("^.bcdefghijklmnopqrstuvwxyz.*$"); + let text = "abcdefghijklmnopqrstuvwxyz"; + b.iter(|| re.is_match(text)); +} + +macro_rules! throughput( + ($name:ident, $regex:expr, $size:expr) => ( + #[bench] + fn $name(b: &mut Bencher) { + let text = gen_text($size); + b.bytes = $size; + b.iter(|| if $regex.is_match(text) { fail!("match") }); + } + ); +) + +fn easy0() -> Regex { regex!("ABCDEFGHIJKLMNOPQRSTUVWXYZ$") } +fn easy1() -> Regex { regex!("A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$") } +fn medium() -> Regex { regex!("[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$") } +fn hard() -> Regex { regex!("[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$") } + +fn gen_text(n: uint) -> ~str { + let mut rng = task_rng(); + let mut bytes = rng.gen_ascii_str(n).into_bytes(); + for (i, b) in bytes.mut_iter().enumerate() { + if i % 20 == 0 { + *b = '\n' as u8 + } + } + str::from_utf8(bytes).unwrap().to_owned() +} + +throughput!(easy0_32, easy0(), 32) +throughput!(easy0_1K, easy0(), 1<<10) +throughput!(easy0_32K, easy0(), 32<<10) + +throughput!(easy1_32, easy1(), 32) +throughput!(easy1_1K, easy1(), 1<<10) +throughput!(easy1_32K, easy1(), 32<<10) + +throughput!(medium_32, medium(), 32) +throughput!(medium_1K, medium(), 1<<10) +throughput!(medium_32K,medium(), 32<<10) + +throughput!(hard_32, hard(), 32) +throughput!(hard_1K, hard(), 1<<10) +throughput!(hard_32K,hard(), 32<<10) + | 
