syntax: Tighten search paths for inner modules

This is an implementation of RFC 16. A module can now only be loaded if the module declaring `mod name;` "owns" the current directory. A module is considered as owning its directory if it meets one of the following criteria: * It is the top-level crate file * It is a `mod.rs` file * It was loaded via `#[path]` * It was loaded via `include!` * The module was declared via an inline `mod foo { ... }` statement For example, this directory structure is now invalid // lib.rs mod foo; // foo.rs mod bar; // bar.rs; fn bar() {} With this change `foo.rs` must be renamed to `foo/mod.rs`, and `bar.rs` must be renamed to `foo/bar.rs`. This makes it clear that `bar` is a submodule of `foo`, and can only be accessed through `foo`. RFC: 0016-module-file-system-hierarchy Closes #14180 [breaking-change]
author: Alex Crichton <alex@alexcrichton.com> 2014-05-16 14:23:04 -0700
committer: Alex Crichton <alex@alexcrichton.com> 2014-05-17 01:01:47 -0700
commit: 4e9e091e91ea2ad8a6f45a9b20ff331d4bca7a23 (patch)
tree: c01cb3c61732225abd28d5eb8991537ca7ad30b5 /src/libregex/parse.rs
parent: 25c54226c3e7dd6f59cf2e92238a4d79d8b0128d (diff)
download: rust-4e9e091e91ea2ad8a6f45a9b20ff331d4bca7a23.tar.gz
rust-4e9e091e91ea2ad8a6f45a9b20ff331d4bca7a23.zip
1 files changed, 0 insertions, 1030 deletions
diff --git a/src/libregex/parse.rs b/src/libregex/parse.rs
deleted file mode 100644
index a695da9fa16..00000000000
--- a/src/libregex/parse.rs
+++ /dev/null
@@ -1,1030 +0,0 @@
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-use std::char;
-use std::cmp;
-use std::fmt;
-use std::iter;
-use std::num;
-use std::str;
-
-/// Static data containing Unicode ranges for general categories and scripts.
-use self::unicode::{UNICODE_CLASSES, PERLD, PERLS, PERLW};
-#[allow(visible_private_types)]
-pub mod unicode;
-
-/// The maximum number of repetitions allowed with the `{n,m}` syntax.
-static MAX_REPEAT: uint = 1000;
-
-/// Error corresponds to something that can go wrong while parsing
-/// a regular expression.
-///
-/// (Once an expression is compiled, it is not possible to produce an error
-/// via searching, splitting or replacing.)
-pub struct Error {
-    /// The *approximate* character index of where the error occurred.
-    pub pos: uint,
-    /// A message describing the error.
-    pub msg: StrBuf,
-}
-
-impl fmt::Show for Error {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "Regex syntax error near position {}: {}",
-               self.pos, self.msg)
-    }
-}
-
-/// Represents the abstract syntax of a regular expression.
-/// It is showable so that error messages resulting from a bug can provide
-/// useful information.
-/// It is cloneable so that expressions can be repeated for the counted
-/// repetition feature. (No other copying is done.)
-///
-/// Note that this representation prevents one from reproducing the regex as
-/// it was typed. (But it could be used to reproduce an equivalent regex.)
-#[deriving(Show, Clone)]
-pub enum Ast {
-    Nothing,
-    Literal(char, Flags),
-    Dot(Flags),
-    Class(Vec<(char, char)>, Flags),
-    Begin(Flags),
-    End(Flags),
-    WordBoundary(Flags),
-    Capture(uint, Option<StrBuf>, Box<Ast>),
-    // Represent concatenation as a flat vector to avoid blowing the
-    // stack in the compiler.
-    Cat(Vec<Ast>),
-    Alt(Box<Ast>, Box<Ast>),
-    Rep(Box<Ast>, Repeater, Greed),
-}
-
-#[deriving(Show, Eq, Clone)]
-pub enum Repeater {
-    ZeroOne,
-    ZeroMore,
-    OneMore,
-}
-
-#[deriving(Show, Clone)]
-pub enum Greed {
-    Greedy,
-    Ungreedy,
-}
-
-impl Greed {
-    pub fn is_greedy(&self) -> bool {
-        match *self {
-            Greedy => true,
-            _ => false,
-        }
-    }
-
-    fn swap(self, swapped: bool) -> Greed {
-        if !swapped { return self }
-        match self {
-            Greedy => Ungreedy,
-            Ungreedy => Greedy,
-        }
-    }
-}
-
-/// BuildAst is a regrettable type that represents intermediate state for
-/// constructing an abstract syntax tree. Its central purpose is to facilitate
-/// parsing groups and alternations while also maintaining a stack of flag
-/// state.
-#[deriving(Show)]
-enum BuildAst {
-    Ast(Ast),
-    Paren(Flags, uint, StrBuf), // '('
-    Bar, // '|'
-}
-
-impl BuildAst {
-    fn paren(&self) -> bool {
-        match *self {
-            Paren(_, _, _) => true,
-            _ => false,
-        }
-    }
-
-    fn flags(&self) -> Flags {
-        match *self {
-            Paren(flags, _, _) => flags,
-            _ => fail!("Cannot get flags from {}", self),
-        }
-    }
-
-    fn capture(&self) -> Option<uint> {
-        match *self {
-            Paren(_, 0, _) => None,
-            Paren(_, c, _) => Some(c),
-            _ => fail!("Cannot get capture group from {}", self),
-        }
-    }
-
-    fn capture_name(&self) -> Option<StrBuf> {
-        match *self {
-            Paren(_, 0, _) => None,
-            Paren(_, _, ref name) => {
-                if name.len() == 0 {
-                    None
-                } else {
-                    Some(name.clone())
-                }
-            }
-            _ => fail!("Cannot get capture name from {}", self),
-        }
-    }
-
-    fn bar(&self) -> bool {
-        match *self {
-            Bar => true,
-            _ => false,
-        }
-    }
-
-    fn unwrap(self) -> Result<Ast, Error> {
-        match self {
-            Ast(x) => Ok(x),
-            _ => fail!("Tried to unwrap non-AST item: {}", self),
-        }
-    }
-}
-
-/// Flags represents all options that can be twiddled by a user in an
-/// expression.
-pub type Flags = u8;
-
-pub static FLAG_EMPTY:      u8 = 0;
-pub static FLAG_NOCASE:     u8 = 1 << 0; // i
-pub static FLAG_MULTI:      u8 = 1 << 1; // m
-pub static FLAG_DOTNL:      u8 = 1 << 2; // s
-pub static FLAG_SWAP_GREED: u8 = 1 << 3; // U
-pub static FLAG_NEGATED:    u8 = 1 << 4; // char class or not word boundary
-
-struct Parser<'a> {
-    // The input, parsed only as a sequence of UTF8 code points.
-    chars: Vec<char>,
-    // The index of the current character in the input.
-    chari: uint,
-    // The intermediate state representing the AST.
-    stack: Vec<BuildAst>,
-    // The current set of flags.
-    flags: Flags,
-    // The total number of capture groups.
-    // Incremented each time an opening left paren is seen (assuming it is
-    // opening a capture group).
-    caps: uint,
-    // A set of all capture group names used only to detect duplicates.
-    names: Vec<StrBuf>,
-}
-
-pub fn parse(s: &str) -> Result<Ast, Error> {
-    Parser {
-        chars: s.chars().collect(),
-        chari: 0,
-        stack: vec!(),
-        flags: FLAG_EMPTY,
-        caps: 0,
-        names: vec!(),
-    }.parse()
-}
-
-impl<'a> Parser<'a> {
-    fn parse(&mut self) -> Result<Ast, Error> {
-        loop {
-            let c = self.cur();
-            match c {
-                '?' | '*' | '+' => try!(self.push_repeater(c)),
-                '\\' => {
-                    let ast = try!(self.parse_escape());
-                    self.push(ast)
-                }
-                '{' => try!(self.parse_counted()),
-                '[' => match self.try_parse_ascii() {
-                    None => try!(self.parse_class()),
-                    Some(class) => self.push(class),
-                },
-                '(' => {
-                    if self.peek_is(1, '?') {
-                        try!(self.expect('?'))
-                        try!(self.parse_group_opts())
-                    } else {
-                        self.caps += 1;
-                        self.stack.push(Paren(self.flags,
-                                              self.caps,
-                                              "".to_strbuf()))
-                    }
-                }
-                ')' => {
-                    let catfrom = try!(
-                        self.pos_last(false, |x| x.paren() || x.bar()));
-                    try!(self.concat(catfrom));
-
-                    let altfrom = try!(self.pos_last(false, |x| x.paren()));
-                    // Before we smush the alternates together and pop off the
-                    // left paren, let's grab the old flags and see if we
-                    // need a capture.
-                    let (cap, cap_name, oldflags) = {
-                        let paren = self.stack.get(altfrom-1);
-                        (paren.capture(), paren.capture_name(), paren.flags())
-                    };
-                    try!(self.alternate(altfrom));
-                    self.flags = oldflags;
-
-                    // If this was a capture, pop what we just pushed in
-                    // alternate and make it a capture.
-                    if cap.is_some() {
-                        let ast = try!(self.pop_ast());
-                        self.push(Capture(cap.unwrap(), cap_name, box ast));
-                    }
-                }
-                '|' => {
-                    let catfrom = try!(
-                        self.pos_last(true, |x| x.paren() || x.bar()));
-                    try!(self.concat(catfrom));
-
-                    self.stack.push(Bar);
-                }
-                _ => try!(self.push_literal(c)),
-            }
-            if !self.next_char() {
-                break
-            }
-        }
-
-        // Try to improve error handling. At this point, there should be
-        // no remaining open parens.
-        if self.stack.iter().any(|x| x.paren()) {
-            return self.err("Unclosed parenthesis.")
-        }
-        let catfrom = try!(self.pos_last(true, |x| x.bar()));
-        try!(self.concat(catfrom));
-        try!(self.alternate(0));
-
-        assert!(self.stack.len() == 1);
-        self.pop_ast()
-    }
-
-    fn noteof(&mut self, expected: &str) -> Result<(), Error> {
-        match self.next_char() {
-            true => Ok(()),
-            false => self.err(format!("Expected {} but got EOF.", expected)),
-        }
-    }
-
-    fn expect(&mut self, expected: char) -> Result<(), Error> {
-        match self.next_char() {
-            true if self.cur() == expected => Ok(()),
-            true => self.err(format!("Expected '{}' but got '{}'.",
-                                     expected, self.cur())),
-            false => self.err(format!("Expected '{}' but got EOF.", expected)),
-        }
-    }
-
-    fn next_char(&mut self) -> bool {
-        self.chari += 1;
-        self.chari < self.chars.len()
-    }
-
-    fn pop_ast(&mut self) -> Result<Ast, Error> {
-        match self.stack.pop().unwrap().unwrap() {
-            Err(e) => Err(e),
-            Ok(ast) => Ok(ast),
-        }
-    }
-
-    fn push(&mut self, ast: Ast) {
-        self.stack.push(Ast(ast))
-    }
-
-    fn push_repeater(&mut self, c: char) -> Result<(), Error> {
-        if self.stack.len() == 0 {
-            return self.err(
-                "A repeat operator must be preceded by a valid expression.")
-        }
-        let rep: Repeater = match c {
-            '?' => ZeroOne, '*' => ZeroMore, '+' => OneMore,
-            _ => fail!("Not a valid repeater operator."),
-        };
-
-        match self.peek(1) {
-            Some('*') | Some('+') =>
-                return self.err(
-                    "Double repeat operators are not supported."),
-            _ => {},
-        }
-        let ast = try!(self.pop_ast());
-        match ast {
-            Begin(_) | End(_) | WordBoundary(_) =>
-                return self.err(
-                    "Repeat arguments cannot be empty width assertions."),
-            _ => {}
-        }
-        let greed = try!(self.get_next_greedy());
-        self.push(Rep(box ast, rep, greed));
-        Ok(())
-    }
-
-    fn push_literal(&mut self, c: char) -> Result<(), Error> {
-        match c {
-            '.' => {
-                self.push(Dot(self.flags))
-            }
-            '^' => {
-                self.push(Begin(self.flags))
-            }
-            '$' => {
-                self.push(End(self.flags))
-            }
-            _ => {
-                self.push(Literal(c, self.flags))
-            }
-        }
-        Ok(())
-    }
-
-    // Parses all forms of character classes.
-    // Assumes that '[' is the current character.
-    fn parse_class(&mut self) -> Result<(), Error> {
-        let negated =
-            if self.peek_is(1, '^') {
-                try!(self.expect('^'))
-                FLAG_NEGATED
-            } else {
-                FLAG_EMPTY
-            };
-        let mut ranges: Vec<(char, char)> = vec!();
-        let mut alts: Vec<Ast> = vec!();
-
-        if self.peek_is(1, ']') {
-            try!(self.expect(']'))
-            ranges.push((']', ']'))
-        }
-        while self.peek_is(1, '-') {
-            try!(self.expect('-'))
-            ranges.push(('-', '-'))
-        }
-        loop {
-            try!(self.noteof("a closing ']' or a non-empty character class)"))
-            let mut c = self.cur();
-            match c {
-                '[' =>
-                    match self.try_parse_ascii() {
-                        Some(Class(asciis, flags)) => {
-                            alts.push(Class(asciis, flags ^ negated));
-                            continue
-                        }
-                        Some(ast) =>
-                            fail!("Expected Class AST but got '{}'", ast),
-                        // Just drop down and try to add as a regular character.
-                        None => {},
-                    },
-                '\\' => {
-                    match try!(self.parse_escape()) {
-                        Class(asciis, flags) => {
-                            alts.push(Class(asciis, flags ^ negated));
-                            continue
-                        }
-                        Literal(c2, _) => c = c2, // process below
-                        Begin(_) | End(_) | WordBoundary(_) =>
-                            return self.err(
-                                "\\A, \\z, \\b and \\B are not valid escape \
-                                 sequences inside a character class."),
-                        ast => fail!("Unexpected AST item '{}'", ast),
-                    }
-                }
-                _ => {},
-            }
-            match c {
-                ']' => {
-                    if ranges.len() > 0 {
-                        let flags = negated | (self.flags & FLAG_NOCASE);
-                        let mut ast = Class(combine_ranges(ranges), flags);
-                        for alt in alts.move_iter() {
-                            ast = Alt(box alt, box ast)
-                        }
-                        self.push(ast);
-                    } else if alts.len() > 0 {
-                        let mut ast = alts.pop().unwrap();
-                        for alt in alts.move_iter() {
-                            ast = Alt(box alt, box ast)
-                        }
-                        self.push(ast);
-                    }
-                    return Ok(())
-                }
-                c => {
-                    if self.peek_is(1, '-') && !self.peek_is(2, ']') {
-                        try!(self.expect('-'))
-                        try!(self.noteof("not a ']'"))
-                        let c2 = self.cur();
-                        if c2 < c {
-                            return self.err(format!(
-                                "Invalid character class range '{}-{}'", c, c2))
-                        }
-                        ranges.push((c, self.cur()))
-                    } else {
-                        ranges.push((c, c))
-                    }
-                }
-            }
-        }
-    }
-
-    // Tries to parse an ASCII character class of the form [:name:].
-    // If successful, returns an AST character class corresponding to name
-    // and moves the parser to the final ']' character.
-    // If unsuccessful, no state is changed and None is returned.
-    // Assumes that '[' is the current character.
-    fn try_parse_ascii(&mut self) -> Option<Ast> {
-        if !self.peek_is(1, ':') {
-            return None
-        }
-        let closer =
-            match self.pos(']') {
-                Some(i) => i,
-                None => return None,
-            };
-        if *self.chars.get(closer-1) != ':' {
-            return None
-        }
-        if closer - self.chari <= 3 {
-            return None
-        }
-        let mut name_start = self.chari + 2;
-        let negated =
-            if self.peek_is(2, '^') {
-                name_start += 1;
-                FLAG_NEGATED
-            } else {
-                FLAG_EMPTY
-            };
-        let name = self.slice(name_start, closer - 1);
-        match find_class(ASCII_CLASSES, name.as_slice()) {
-            None => None,
-            Some(ranges) => {
-                self.chari = closer;
-                let flags = negated | (self.flags & FLAG_NOCASE);
-                Some(Class(combine_ranges(ranges), flags))
-            }
-        }
-    }
-
-    // Parses counted repetition. Supports:
-    // {n}, {n,}, {n,m}, {n}?, {n,}? and {n,m}?
-    // Assumes that '{' is the current character.
-    // Returns either an error or moves the parser to the final '}' character.
-    // (Or the '?' character if not greedy.)
-    fn parse_counted(&mut self) -> Result<(), Error> {
-        // Scan until the closing '}' and grab the stuff in {}.
-        let start = self.chari;
-        let closer =
-            match self.pos('}') {
-                Some(i) => i,
-                None => return self.err(format!(
-                    "No closing brace for counted repetition starting at \
-                     position {}.", start)),
-            };
-        self.chari = closer;
-        let greed = try!(self.get_next_greedy());
-        let inner = str::from_chars(
-            self.chars.as_slice().slice(start + 1, closer));
-
-        // Parse the min and max values from the regex.
-        let (mut min, mut max): (uint, Option<uint>);
-        if !inner.contains(",") {
-            min = try!(self.parse_uint(inner));
-            max = Some(min);
-        } else {
-            let pieces: Vec<&str> = inner.splitn(',', 1).collect();
-            let (smin, smax) = (*pieces.get(0), *pieces.get(1));
-            if smin.len() == 0 {
-                return self.err("Max repetitions cannot be specified \
-                                    without min repetitions.")
-            }
-            min = try!(self.parse_uint(smin));
-            max =
-                if smax.len() == 0 {
-                    None
-                } else {
-                    Some(try!(self.parse_uint(smax)))
-                };
-        }
-
-        // Do some bounds checking and make sure max >= min.
-        if min > MAX_REPEAT {
-            return self.err(format!(
-                "{} exceeds maximum allowed repetitions ({})",
-                min, MAX_REPEAT));
-        }
-        if max.is_some() {
-            let m = max.unwrap();
-            if m > MAX_REPEAT {
-                return self.err(format!(
-                    "{} exceeds maximum allowed repetitions ({})",
-                    m, MAX_REPEAT));
-            }
-            if m < min {
-                return self.err(format!(
-                    "Max repetitions ({}) cannot be smaller than min \
-                     repetitions ({}).", m, min));
-            }
-        }
-
-        // Now manipulate the AST be repeating elements.
-        if max.is_none() {
-            // Require N copies of what's on the stack and then repeat it.
-            let ast = try!(self.pop_ast());
-            for _ in iter::range(0, min) {
-                self.push(ast.clone())
-            }
-            self.push(Rep(box ast, ZeroMore, greed));
-        } else {
-            // Require N copies of what's on the stack and then repeat it
-            // up to M times optionally.
-            let ast = try!(self.pop_ast());
-            for _ in iter::range(0, min) {
-                self.push(ast.clone())
-            }
-            if max.is_some() {
-                for _ in iter::range(min, max.unwrap()) {
-                    self.push(Rep(box ast.clone(), ZeroOne, greed))
-                }
-            }
-            // It's possible that we popped something off the stack but
-            // never put anything back on it. To keep things simple, add
-            // a no-op expression.
-            if min == 0 && (max.is_none() || max == Some(0)) {
-                self.push(Nothing)
-            }
-        }
-        Ok(())
-    }
-
-    // Parses all escape sequences.
-    // Assumes that '\' is the current character.
-    fn parse_escape(&mut self) -> Result<Ast, Error> {
-        try!(self.noteof("an escape sequence following a '\\'"))
-
-        let c = self.cur();
-        if is_punct(c) {
-            return Ok(Literal(c, FLAG_EMPTY))
-        }
-        match c {
-            'a' => Ok(Literal('\x07', FLAG_EMPTY)),
-            'f' => Ok(Literal('\x0C', FLAG_EMPTY)),
-            't' => Ok(Literal('\t', FLAG_EMPTY)),
-            'n' => Ok(Literal('\n', FLAG_EMPTY)),
-            'r' => Ok(Literal('\r', FLAG_EMPTY)),
-            'v' => Ok(Literal('\x0B', FLAG_EMPTY)),
-            'A' => Ok(Begin(FLAG_EMPTY)),
-            'z' => Ok(End(FLAG_EMPTY)),
-            'b' => Ok(WordBoundary(FLAG_EMPTY)),
-            'B' => Ok(WordBoundary(FLAG_NEGATED)),
-            '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7' => Ok(try!(self.parse_octal())),
-            'x' => Ok(try!(self.parse_hex())),
-            'p' | 'P' => Ok(try!(self.parse_unicode_name())),
-            'd' | 'D' | 's' | 'S' | 'w' | 'W' => {
-                let ranges = perl_unicode_class(c);
-                let mut flags = self.flags & FLAG_NOCASE;
-                if c.is_uppercase() { flags |= FLAG_NEGATED }
-                Ok(Class(ranges, flags))
-            }
-            _ => self.err(format!("Invalid escape sequence '\\\\{}'", c)),
-        }
-    }
-
-    // Parses a unicode character class name, either of the form \pF where
-    // F is a one letter unicode class name or of the form \p{name} where
-    // name is the unicode class name.
-    // Assumes that \p or \P has been read (and 'p' or 'P' is the current
-    // character).
-    fn parse_unicode_name(&mut self) -> Result<Ast, Error> {
-        let negated = if self.cur() == 'P' { FLAG_NEGATED } else { FLAG_EMPTY };
-        let mut name: StrBuf;
-        if self.peek_is(1, '{') {
-            try!(self.expect('{'))
-            let closer =
-                match self.pos('}') {
-                    Some(i) => i,
-                    None => return self.err(format!(
-                        "Missing '\\}' for unclosed '\\{' at position {}",
-                        self.chari)),
-                };
-            if closer - self.chari + 1 == 0 {
-                return self.err("No Unicode class name found.")
-            }
-            name = self.slice(self.chari + 1, closer);
-            self.chari = closer;
-        } else {
-            if self.chari + 1 >= self.chars.len() {
-                return self.err("No single letter Unicode class name found.")
-            }
-            name = self.slice(self.chari + 1, self.chari + 2);
-            self.chari += 1;
-        }
-        match find_class(UNICODE_CLASSES, name.as_slice()) {
-            None => return self.err(format!(
-                "Could not find Unicode class '{}'", name)),
-            Some(ranges) => {
-                Ok(Class(ranges, negated | (self.flags & FLAG_NOCASE)))
-            }
-        }
-    }
-
-    // Parses an octal number, up to 3 digits.
-    // Assumes that \n has been read, where n is the first digit.
-    fn parse_octal(&mut self) -> Result<Ast, Error> {
-        let start = self.chari;
-        let mut end = start + 1;
-        let (d2, d3) = (self.peek(1), self.peek(2));
-        if d2 >= Some('0') && d2 <= Some('7') {
-            try!(self.noteof("expected octal character in [0-7]"))
-            end += 1;
-            if d3 >= Some('0') && d3 <= Some('7') {
-                try!(self.noteof("expected octal character in [0-7]"))
-                end += 1;
-            }
-        }
-        let s = self.slice(start, end);
-        match num::from_str_radix::<u32>(s.as_slice(), 8) {
-            Some(n) => Ok(Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)),
-            None => self.err(format!(
-                "Could not parse '{}' as octal number.", s)),
-        }
-    }
-
-    // Parse a hex number. Either exactly two digits or anything in {}.
-    // Assumes that \x has been read.
-    fn parse_hex(&mut self) -> Result<Ast, Error> {
-        if !self.peek_is(1, '{') {
-            try!(self.expect('{'))
-            return self.parse_hex_two()
-        }
-        let start = self.chari + 2;
-        let closer =
-            match self.pos('}') {
-                None => return self.err(format!(
-                    "Missing '\\}' for unclosed '\\{' at position {}", start)),
-                Some(i) => i,
-            };
-        self.chari = closer;
-        self.parse_hex_digits(self.slice(start, closer).as_slice())
-    }
-
-    // Parses a two-digit hex number.
-    // Assumes that \xn has been read, where n is the first digit and is the
-    // current character.
-    // After return, parser will point at the second digit.
-    fn parse_hex_two(&mut self) -> Result<Ast, Error> {
-        let (start, end) = (self.chari, self.chari + 2);
-        let bad = self.slice(start - 2, self.chars.len());
-        try!(self.noteof(format!("Invalid hex escape sequence '{}'", bad)))
-        self.parse_hex_digits(self.slice(start, end).as_slice())
-    }
-
-    // Parses `s` as a hexadecimal number.
-    fn parse_hex_digits(&self, s: &str) -> Result<Ast, Error> {
-        match num::from_str_radix::<u32>(s, 16) {
-            Some(n) => Ok(Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)),
-            None => self.err(format!(
-                "Could not parse '{}' as hex number.", s)),
-        }
-    }
-
-    // Parses a named capture.
-    // Assumes that '(?P<' has been consumed and that the current character
-    // is '<'.
-    // When done, parser will be at the closing '>' character.
-    fn parse_named_capture(&mut self) -> Result<(), Error> {
-        try!(self.noteof("a capture name"))
-        let closer =
-            match self.pos('>') {
-                Some(i) => i,
-                None => return self.err("Capture name must end with '>'."),
-            };
-        if closer - self.chari == 0 {
-            return self.err("Capture names must have at least 1 character.")
-        }
-        let name = self.slice(self.chari, closer);
-        if !name.as_slice().chars().all(is_valid_cap) {
-            return self.err(
-                "Capture names can only have underscores, letters and digits.")
-        }
-        if self.names.contains(&name) {
-            return self.err(format!("Duplicate capture group name '{}'.", name))
-        }
-        self.names.push(name.clone());
-        self.chari = closer;
-        self.caps += 1;
-        self.stack.push(Paren(self.flags, self.caps, name));
-        Ok(())
-    }
-
-    // Parses non-capture groups and options.
-    // Assumes that '(?' has already been consumed and '?' is the current
-    // character.
-    fn parse_group_opts(&mut self) -> Result<(), Error> {
-        if self.peek_is(1, 'P') && self.peek_is(2, '<') {
-            try!(self.expect('P')) try!(self.expect('<'))
-            return self.parse_named_capture()
-        }
-        let start = self.chari;
-        let mut flags = self.flags;
-        let mut sign = 1;
-        let mut saw_flag = false;
-        loop {
-            try!(self.noteof("expected non-empty set of flags or closing ')'"))
-            match self.cur() {
-                'i' => { flags = flags | FLAG_NOCASE;     saw_flag = true},
-                'm' => { flags = flags | FLAG_MULTI;      saw_flag = true},
-                's' => { flags = flags | FLAG_DOTNL;      saw_flag = true},
-                'U' => { flags = flags | FLAG_SWAP_GREED; saw_flag = true},
-                '-' => {
-                    if sign < 0 {
-                        return self.err(format!(
-                            "Cannot negate flags twice in '{}'.",
-                            self.slice(start, self.chari + 1)))
-                    }
-                    sign = -1;
-                    saw_flag = false;
-                    flags = flags ^ flags;
-                }
-                ':' | ')' => {
-                    if sign < 0 {
-                        if !saw_flag {
-                            return self.err(format!(
-                                "A valid flag does not follow negation in '{}'",
-                                self.slice(start, self.chari + 1)))
-                        }
-                        flags = flags ^ flags;
-                    }
-                    if self.cur() == ':' {
-                        // Save the old flags with the opening paren.
-                        self.stack.push(Paren(self.flags, 0, "".to_strbuf()));
-                    }
-                    self.flags = flags;
-                    return Ok(())
-                }
-                _ => return self.err(format!(
-                    "Unrecognized flag '{}'.", self.cur())),
-            }
-        }
-    }
-
-    // Peeks at the next character and returns whether it's ungreedy or not.
-    // If it is, then the next character is consumed.
-    fn get_next_greedy(&mut self) -> Result<Greed, Error> {
-        Ok(if self.peek_is(1, '?') {
-            try!(self.expect('?'))
-            Ungreedy
-        } else {
-            Greedy
-        }.swap(self.flags & FLAG_SWAP_GREED > 0))
-    }
-
-    // Searches the stack (starting at the top) until it finds an expression
-    // for which `pred` returns true. The index of that expression in the
-    // stack is returned.
-    // If there's no match, then one of two things happens depending on the
-    // values of `allow_start`. When it's true, then `0` will be returned.
-    // Otherwise, an error will be returned.
-    // Generally, `allow_start` is only true when you're *not* expecting an
-    // opening parenthesis.
-    fn pos_last(&self, allow_start: bool, pred: |&BuildAst| -> bool)
-               -> Result<uint, Error> {
-        let from = match self.stack.iter().rev().position(pred) {
-            Some(i) => i,
-            None => {
-                if allow_start {
-                    self.stack.len()
-                } else {
-                    return self.err("No matching opening parenthesis.")
-                }
-            }
-        };
-        // Adjust index since 'from' is for the reversed stack.
-        // Also, don't include the '(' or '|'.
-        Ok(self.stack.len() - from)
-    }
-
-    // concat starts at `from` in the parser's stack and concatenates all
-    // expressions up to the top of the stack. The resulting concatenation is
-    // then pushed on to the stack.
-    // Usually `from` corresponds to the position of an opening parenthesis,
-    // a '|' (alternation) or the start of the entire expression.
-    fn concat(&mut self, from: uint) -> Result<(), Error> {
-        let ast = try!(self.build_from(from, concat_flatten));
-        self.push(ast);
-        Ok(())
-    }
-
-    // concat starts at `from` in the parser's stack and alternates all
-    // expressions up to the top of the stack. The resulting alternation is
-    // then pushed on to the stack.
-    // Usually `from` corresponds to the position of an opening parenthesis
-    // or the start of the entire expression.
-    // This will also drop any opening parens or alternation bars found in
-    // the intermediate AST.
-    fn alternate(&mut self, mut from: uint) -> Result<(), Error> {
-        // Unlike in the concatenation case, we want 'build_from' to continue
-        // all the way to the opening left paren (so it will be popped off and
-        // thrown away). But be careful with overflow---we can't count on the
-        // open paren to be there.
-        if from > 0 { from = from - 1}
-        let ast = try!(self.build_from(from, |l,r| Alt(box l, box r)));
-        self.push(ast);
-        Ok(())
-    }
-
-    // build_from combines all AST elements starting at 'from' in the
-    // parser's stack using 'mk' to combine them. If any such element is not an
-    // AST then it is popped off the stack and ignored.
-    fn build_from(&mut self, from: uint, mk: |Ast, Ast| -> Ast)
-                 -> Result<Ast, Error> {
-        if from >= self.stack.len() {
-            return self.err("Empty group or alternate not allowed.")
-        }
-
-        let mut combined = try!(self.pop_ast());
-        let mut i = self.stack.len();
-        while i > from {
-            i = i - 1;
-            match self.stack.pop().unwrap() {
-                Ast(x) => combined = mk(x, combined),
-                _ => {},
-            }
-        }
-        Ok(combined)
-    }
-
-    fn parse_uint(&self, s: &str) -> Result<uint, Error> {
-        match from_str::<uint>(s) {
-            Some(i) => Ok(i),
-            None => self.err(format!(
-                "Expected an unsigned integer but got '{}'.", s)),
-        }
-    }
-
-    fn char_from_u32(&self, n: u32) -> Result<char, Error> {
-        match char::from_u32(n) {
-            Some(c) => Ok(c),
-            None => self.err(format!(
-                "Could not decode '{}' to unicode character.", n)),
-        }
-    }
-
-    fn pos(&self, c: char) -> Option<uint> {
-        self.chars.iter()
-            .skip(self.chari).position(|&c2| c2 == c).map(|i| self.chari + i)
-    }
-
-    fn err<T>(&self, msg: &str) -> Result<T, Error> {
-        Err(Error {
-            pos: self.chari,
-            msg: msg.to_strbuf(),
-        })
-    }
-
-    fn peek(&self, offset: uint) -> Option<char> {
-        if self.chari + offset >= self.chars.len() {
-            return None
-        }
-        Some(*self.chars.get(self.chari + offset))
-    }
-
-    fn peek_is(&self, offset: uint, is: char) -> bool {
-        self.peek(offset) == Some(is)
-    }
-
-    fn cur(&self) -> char {
-        *self.chars.get(self.chari)
-    }
-
-    fn slice(&self, start: uint, end: uint) -> StrBuf {
-        str::from_chars(self.chars.as_slice().slice(start, end)).to_strbuf()
-    }
-}
-
-// Given an unordered collection of character ranges, combine_ranges returns
-// an ordered sequence of character ranges where no two ranges overlap. They
-// are ordered from least to greatest (using start position).
-fn combine_ranges(unordered: Vec<(char, char)>) -> Vec<(char, char)> {
-    // Returns true iff the two character classes overlap or share a boundary.
-    // e.g., ('a', 'g') and ('h', 'm') would return true.
-    fn should_merge((a, b): (char, char), (x, y): (char, char)) -> bool {
-        cmp::max(a, x) as u32 <= cmp::min(b, y) as u32 + 1
-    }
-
-    // This is currently O(n^2), but I think with sufficient cleverness,
-    // it can be reduced to O(n) **if necessary**.
-    let mut ordered: Vec<(char, char)> = Vec::with_capacity(unordered.len());
-    for (us, ue) in unordered.move_iter() {
-        let (mut us, mut ue) = (us, ue);
-        assert!(us <= ue);
-        let mut which: Option<uint> = None;
-        for (i, &(os, oe)) in ordered.iter().enumerate() {
-            if should_merge((us, ue), (os, oe)) {
-                us = cmp::min(us, os);
-                ue = cmp::max(ue, oe);
-                which = Some(i);
-                break
-            }
-        }
-        match which {
-            None => ordered.push((us, ue)),
-            Some(i) => *ordered.get_mut(i) = (us, ue),
-        }
-    }
-    ordered.sort();
-    ordered
-}
-
-// Constructs a Unicode friendly Perl character class from \d, \s or \w
-// (or any of their negated forms). Note that this does not handle negation.
-fn perl_unicode_class(which: char) -> Vec<(char, char)> {
-    match which.to_lowercase() {
-        'd' => Vec::from_slice(PERLD),
-        's' => Vec::from_slice(PERLS),
-        'w' => Vec::from_slice(PERLW),
-        _ => unreachable!(),
-    }
-}
-
-// Returns a concatenation of two expressions. This also guarantees that a
-// `Cat` expression will never be a direct child of another `Cat` expression.
-fn concat_flatten(x: Ast, y: Ast) -> Ast {
-    match (x, y) {
-        (Cat(mut xs), Cat(ys)) => { xs.push_all_move(ys); Cat(xs) }
-        (Cat(mut xs), ast) => { xs.push(ast); Cat(xs) }
-        (ast, Cat(mut xs)) => { xs.unshift(ast); Cat(xs) }
-        (ast1, ast2) => Cat(vec!(ast1, ast2)),
-    }
-}
-
-pub fn is_punct(c: char) -> bool {
-    match c {
-        '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' |
-        '[' | ']' | '{' | '}' | '^' | '$' => true,
-        _ => false,
-    }
-}
-
-fn is_valid_cap(c: char) -> bool {
-    c == '_' || (c >= '0' && c <= '9')
-    || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-}
-
-fn find_class(classes: NamedClasses, name: &str) -> Option<Vec<(char, char)>> {
-    match classes.bsearch(|&(s, _)| s.cmp(&name)) {
-        Some(i) => Some(Vec::from_slice(classes[i].val1())),
-        None => None,
-    }
-}
-
-type Class = &'static [(char, char)];
-type NamedClasses = &'static [(&'static str, Class)];
-
-static ASCII_CLASSES: NamedClasses = &[
-    // Classes must be in alphabetical order so that bsearch works.
-    // [:alnum:]      alphanumeric (== [0-9A-Za-z])
-    // [:alpha:]      alphabetic (== [A-Za-z])
-    // [:ascii:]      ASCII (== [\x00-\x7F])
-    // [:blank:]      blank (== [\t ])
-    // [:cntrl:]      control (== [\x00-\x1F\x7F])
-    // [:digit:]      digits (== [0-9])
-    // [:graph:]      graphical (== [!-~])
-    // [:lower:]      lower case (== [a-z])
-    // [:print:]      printable (== [ -~] == [ [:graph:]])
-    // [:punct:]      punctuation (== [!-/:-@[-`{-~])
-    // [:space:]      whitespace (== [\t\n\v\f\r ])
-    // [:upper:]      upper case (== [A-Z])
-    // [:word:]       word characters (== [0-9A-Za-z_])
-    // [:xdigit:]     hex digit (== [0-9A-Fa-f])
-    // Taken from: http://golang.org/pkg/regex/syntax/
-    ("alnum", &[('0', '9'), ('A', 'Z'), ('a', 'z')]),
-    ("alpha", &[('A', 'Z'), ('a', 'z')]),
-    ("ascii", &[('\x00', '\x7F')]),
-    ("blank", &[(' ', ' '), ('\t', '\t')]),
-    ("cntrl", &[('\x00', '\x1F'), ('\x7F', '\x7F')]),
-    ("digit", &[('0', '9')]),
-    ("graph", &[('!', '~')]),
-    ("lower", &[('a', 'z')]),
-    ("print", &[(' ', '~')]),
-    ("punct", &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')]),
-    ("space", &[('\t', '\t'), ('\n', '\n'), ('\x0B', '\x0B'), ('\x0C', '\x0C'),
-                ('\r', '\r'), (' ', ' ')]),
-    ("upper", &[('A', 'Z')]),
-    ("word", &[('0', '9'), ('A', 'Z'), ('a', 'z'), ('_', '_')]),
-    ("xdigit", &[('0', '9'), ('A', 'F'), ('a', 'f')]),
-];
author	Alex Crichton <alex@alexcrichton.com>	2014-05-16 14:23:04 -0700
committer	Alex Crichton <alex@alexcrichton.com>	2014-05-17 01:01:47 -0700
commit	4e9e091e91ea2ad8a6f45a9b20ff331d4bca7a23 (patch)
tree	c01cb3c61732225abd28d5eb8991537ca7ad30b5 /src/libregex/parse.rs
parent	25c54226c3e7dd6f59cf2e92238a4d79d8b0128d (diff)
download	rust-4e9e091e91ea2ad8a6f45a9b20ff331d4bca7a23.tar.gz rust-4e9e091e91ea2ad8a6f45a9b20ff331d4bca7a23.zip