about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2014-10-29 18:17:00 +0000
committerbors <bors@rust-lang.org>2014-10-29 18:17:00 +0000
commit4769bca1483839ff9c0ad8353c206d6ee06c50e1 (patch)
tree434feb68c66c918723602aca1f7c3706faee9907
parentdd7113609c17cc31fee4b54d954e71e01af3bcc9 (diff)
parentf21977318112a1b7491bbf462dfefbd8ec14743e (diff)
downloadrust-4769bca1483839ff9c0ad8353c206d6ee06c50e1.tar.gz
rust-4769bca1483839ff9c0ad8353c206d6ee06c50e1.zip
auto merge of #18282 : pczarn/rust/regex-parse, r=burntsushi
Fixes #18034

3 bugs fixed.
-rw-r--r--src/libregex/parse.rs51
-rw-r--r--src/libregex/test/tests.rs28
2 files changed, 55 insertions, 24 deletions
diff --git a/src/libregex/parse.rs b/src/libregex/parse.rs
index 1d1d1a0e9c5..35583be372c 100644
--- a/src/libregex/parse.rs
+++ b/src/libregex/parse.rs
@@ -374,16 +374,12 @@ impl<'a> Parser<'a> {
         let mut ranges: Vec<(char, char)> = vec!();
         let mut alts: Vec<Ast> = vec!();
 
-        if self.peek_is(1, ']') {
-            try!(self.expect(']'))
-            ranges.push((']', ']'))
-        }
         while self.peek_is(1, '-') {
-            try!(self.expect('-'))
+            try!(self.expect('-'));
             ranges.push(('-', '-'))
         }
         loop {
-            try!(self.noteof("a closing ']' or a non-empty character class)"))
+            try!(self.noteof("a closing ']' or a non-empty character class)"));
             let mut c = self.cur();
             match c {
                 '[' =>
@@ -411,10 +407,7 @@ impl<'a> Parser<'a> {
                         ast => fail!("Unexpected AST item '{}'", ast),
                     }
                 }
-                _ => {},
-            }
-            match c {
-                ']' => {
+                ']' if ranges.len() > 0 || alts.len() > 0 => {
                     if ranges.len() > 0 {
                         let flags = negated | (self.flags & FLAG_NOCASE);
                         let mut ast = AstClass(combine_ranges(ranges), flags);
@@ -431,22 +424,32 @@ impl<'a> Parser<'a> {
                     }
                     return Ok(())
                 }
-                c => {
-                    if self.peek_is(1, '-') && !self.peek_is(2, ']') {
-                        try!(self.expect('-'))
-                        try!(self.noteof("not a ']'"))
-                        let c2 = self.cur();
-                        if c2 < c {
-                            return self.err(format!("Invalid character class \
-                                                     range '{}-{}'",
-                                                    c,
-                                                    c2).as_slice())
-                        }
-                        ranges.push((c, self.cur()))
-                    } else {
-                        ranges.push((c, c))
+                _ => {}
+            }
+
+            if self.peek_is(1, '-') && !self.peek_is(2, ']') {
+                try!(self.expect('-'));
+                // The regex can't end here.
+                try!(self.noteof("not a ']'"));
+                // End the range with a single character or character escape.
+                let mut c2 = self.cur();
+                if c2 == '\\' {
+                    match try!(self.parse_escape()) {
+                        Literal(c3, _) => c2 = c3, // allow literal escapes below
+                        ast =>
+                            return self.err(format!("Expected a literal, but got {}.",
+                                                    ast).as_slice()),
                     }
                 }
+                if c2 < c {
+                    return self.err(format!("Invalid character class \
+                                             range '{}-{}'",
+                                            c,
+                                            c2).as_slice())
+                }
+                ranges.push((c, self.cur()))
+            } else {
+                ranges.push((c, c))
             }
         }
     }
diff --git a/src/libregex/test/tests.rs b/src/libregex/test/tests.rs
index 088425c0888..06f7db27418 100644
--- a/src/libregex/test/tests.rs
+++ b/src/libregex/test/tests.rs
@@ -43,6 +43,30 @@ fn empty_regex_nonempty_match() {
     assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]);
 }
 
+#[test]
+fn quoted_bracket_set() {
+    let re = regex!(r"([\x{5b}\x{5d}])");
+    let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
+    assert_eq!(ms, vec![(0, 1), (1, 2)]);
+    let re = regex!(r"([\[\]])");
+    let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
+    assert_eq!(ms, vec![(0, 1), (1, 2)]);
+}
+
+#[test]
+fn first_range_starts_with_left_bracket() {
+    let re = regex!(r"([[-z])");
+    let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
+    assert_eq!(ms, vec![(0, 1), (1, 2)]);
+}
+
+#[test]
+fn range_ends_with_escape() {
+    let re = regex!(r"([\[-\x{5d}])");
+    let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
+    assert_eq!(ms, vec![(0, 1), (1, 2)]);
+}
+
 macro_rules! replace(
     ($name:ident, $which:ident, $re:expr,
      $search:expr, $replace:expr, $result:expr) => (
@@ -114,6 +138,10 @@ noparse!(fail_double_neg, "(?-i-i)")
 noparse!(fail_neg_empty, "(?i-)")
 noparse!(fail_empty_group, "()")
 noparse!(fail_dupe_named, "(?P<a>.)(?P<a>.)")
+noparse!(fail_range_end_no_class, "[a-[:lower:]]")
+noparse!(fail_range_end_no_begin, r"[a-\A]")
+noparse!(fail_range_end_no_end, r"[a-\z]")
+noparse!(fail_range_end_no_boundary, r"[a-\b]")
 
 macro_rules! mat(
     ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => (