diff options
| author | bors <bors@rust-lang.org> | 2014-04-24 23:41:15 -0700 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2014-04-24 23:41:15 -0700 |
| commit | eea4909a8713a54b3c47e871a70baf6c722999a3 (patch) | |
| tree | b01796a9342a4763e0701712c72f8fe22672c789 /src/etc/regex-match-tests.py | |
| parent | 2bb2341a4af75fb54b809a8e1d5aacbca4df56fc (diff) | |
| parent | 7269bc77e1d1f1babfd159db97024cbd535c47a7 (diff) | |
| download | rust-eea4909a8713a54b3c47e871a70baf6c722999a3.tar.gz rust-eea4909a8713a54b3c47e871a70baf6c722999a3.zip | |
auto merge of #13700 : BurntSushi/rust/regexp, r=alexcrichton
Implements [RFC 7](https://github.com/rust-lang/rfcs/blob/master/active/0007-regexps.md) and will hopefully resolve #3591. The crate is marked as experimental. It includes a syntax extension for compiling regexps to native Rust code. Embeds and passes the `basic`, `nullsubexpr` and `repetition` tests from [Glenn Fowler's (slightly modified by Russ Cox for leftmost-first semantics) testregex test suite](http://www2.research.att.com/~astopen/testregex/testregex.html). I've also hand written a plethora of other tests that exercise Unicode support, the parser, public API, etc. Also includes a `regex-dna` benchmark for the shootout. I know the addition looks huge at first, but consider these things: 1. More than half the number of lines is dedicated to Unicode character classes. 2. Of the ~4,500 lines remaining, 1,225 of them are comments. 3. Another ~800 are tests. 4. That leaves 2500 lines for the meat. The parser is ~850 of them. The public API, compiler, dynamic VM and code generator (for `regexp!`) make up the rest.
Diffstat (limited to 'src/etc/regex-match-tests.py')
| -rwxr-xr-x | src/etc/regex-match-tests.py | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/src/etc/regex-match-tests.py b/src/etc/regex-match-tests.py new file mode 100755 index 00000000000..826af961fce --- /dev/null +++ b/src/etc/regex-match-tests.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python2 + +# Copyright 2014 The Rust Project Developers. See the COPYRIGHT +# file at the top-level directory of this distribution and at +# http://rust-lang.org/COPYRIGHT. +# +# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +from __future__ import absolute_import, division, print_function +import argparse +import datetime +import os.path as path + + +def print_tests(tests): + print('\n'.join([test_tostr(t) for t in tests])) + + +def read_tests(f): + basename, _ = path.splitext(path.basename(f)) + tests = [] + for lineno, line in enumerate(open(f), 1): + fields = filter(None, map(str.strip, line.split('\t'))) + if not (4 <= len(fields) <= 5) \ + or 'E' not in fields[0] or fields[0][0] == '#': + continue + + opts, pat, text, sgroups = fields[0:4] + groups = [] # groups as integer ranges + if sgroups == 'NOMATCH': + groups = [None] + elif ',' in sgroups: + noparen = map(lambda s: s.strip('()'), sgroups.split(')(')) + for g in noparen: + s, e = map(str.strip, g.split(',')) + if s == '?' and e == '?': + groups.append(None) + else: + groups.append((int(s), int(e))) + else: + # This skips tests that should result in an error. + # There aren't many, so I think we can just capture those + # manually. Possibly fix this in future. + continue + + if pat == 'SAME': + pat = tests[-1][1] + if '$' in opts: + pat = pat.decode('string_escape') + text = text.decode('string_escape') + if 'i' in opts: + pat = '(?i)%s' % pat + + name = '%s_%d' % (basename, lineno) + tests.append((name, pat, text, groups)) + return tests + + +def test_tostr(t): + lineno, pat, text, groups = t + options = map(group_tostr, groups) + return 'mat!(match_%s, r"%s", r"%s", %s)' \ + % (lineno, pat, '' if text == "NULL" else text, ', '.join(options)) + + +def group_tostr(g): + if g is None: + return 'None' + else: + return 'Some((%d, %d))' % (g[0], g[1]) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Generate match tests from an AT&T POSIX test file.') + aa = parser.add_argument + aa('files', nargs='+', + help='A list of dat AT&T POSIX test files. See src/libregexp/testdata') + args = parser.parse_args() + + tests = [] + for f in args.files: + tests += read_tests(f) + + tpl = '''// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// ignore-tidy-linelength + +// DO NOT EDIT. Automatically generated by 'src/etc/regexp-match-tests' +// on {date}. +''' + print(tpl.format(date=str(datetime.datetime.now()))) + + for f in args.files: + print('// Tests from %s' % path.basename(f)) + print_tests(read_tests(f)) + print('') |
