about summary refs log tree commit diff
path: root/src/libregex/compile.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/libregex/compile.rs')
-rw-r--r--src/libregex/compile.rs275
1 files changed, 0 insertions, 275 deletions
diff --git a/src/libregex/compile.rs b/src/libregex/compile.rs
deleted file mode 100644
index d29a7a425c1..00000000000
--- a/src/libregex/compile.rs
+++ /dev/null
@@ -1,275 +0,0 @@
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-// Enable this to squash warnings due to exporting pieces of the representation
-// for use with the regex! macro. See lib.rs for explanation.
-
-pub use self::Inst::*;
-
-use std::cmp;
-use std::iter::repeat;
-use parse;
-use parse::{
-    Flags, FLAG_EMPTY,
-    Nothing, Literal, Dot, AstClass, Begin, End, WordBoundary, Capture, Cat, Alt,
-    Rep,
-    ZeroOne, ZeroMore, OneMore,
-};
-
-type InstIdx = uint;
-
-#[derive(Show, Clone)]
-pub enum Inst {
-    // When a Match instruction is executed, the current thread is successful.
-    Match,
-
-    // The OneChar instruction matches a literal character.
-    // The flags indicate whether to do a case insensitive match.
-    OneChar(char, Flags),
-
-    // The CharClass instruction tries to match one input character against
-    // the range of characters given.
-    // The flags indicate whether to do a case insensitive match and whether
-    // the character class is negated or not.
-    CharClass(Vec<(char, char)>, Flags),
-
-    // Matches any character except new lines.
-    // The flags indicate whether to include the '\n' character.
-    Any(Flags),
-
-    // Matches the beginning of the string, consumes no characters.
-    // The flags indicate whether it matches if the preceding character
-    // is a new line.
-    EmptyBegin(Flags),
-
-    // Matches the end of the string, consumes no characters.
-    // The flags indicate whether it matches if the proceeding character
-    // is a new line.
-    EmptyEnd(Flags),
-
-    // Matches a word boundary (\w on one side and \W \A or \z on the other),
-    // and consumes no character.
-    // The flags indicate whether this matches a word boundary or something
-    // that isn't a word boundary.
-    EmptyWordBoundary(Flags),
-
-    // Saves the current position in the input string to the Nth save slot.
-    Save(uint),
-
-    // Jumps to the instruction at the index given.
-    Jump(InstIdx),
-
-    // Jumps to the instruction at the first index given. If that leads to
-    // a panic state, then the instruction at the second index given is
-    // tried.
-    Split(InstIdx, InstIdx),
-}
-
-/// Program represents a compiled regular expression. Once an expression is
-/// compiled, its representation is immutable and will never change.
-///
-/// All of the data in a compiled expression is wrapped in "MaybeStatic" or
-/// "MaybeOwned" types so that a `Program` can be represented as static data.
-/// (This makes it convenient and efficient for use with the `regex!` macro.)
-#[derive(Clone)]
-pub struct Program {
-    /// A sequence of instructions.
-    pub insts: Vec<Inst>,
-    /// If the regular expression requires a literal prefix in order to have a
-    /// match, that prefix is stored here. (It's used in the VM to implement
-    /// an optimization.)
-    pub prefix: String,
-}
-
-impl Program {
-    /// Compiles a Regex given its AST.
-    pub fn new(ast: parse::Ast) -> (Program, Vec<Option<String>>) {
-        let mut c = Compiler {
-            insts: Vec::with_capacity(100),
-            names: Vec::with_capacity(10),
-        };
-
-        c.insts.push(Save(0));
-        c.compile(ast);
-        c.insts.push(Save(1));
-        c.insts.push(Match);
-
-        // Try to discover a literal string prefix.
-        // This is a bit hacky since we have to skip over the initial
-        // 'Save' instruction.
-        let mut pre = String::with_capacity(5);
-        for inst in c.insts[1..].iter() {
-            match *inst {
-                OneChar(c, FLAG_EMPTY) => pre.push(c),
-                _ => break
-            }
-        }
-
-        let Compiler { insts, names } = c;
-        let prog = Program {
-            insts: insts,
-            prefix: pre,
-        };
-        (prog, names)
-    }
-
-    /// Returns the total number of capture groups in the regular expression.
-    /// This includes the zeroth capture.
-    pub fn num_captures(&self) -> uint {
-        let mut n = 0;
-        for inst in self.insts.iter() {
-            match *inst {
-                Save(c) => n = cmp::max(n, c+1),
-                _ => {}
-            }
-        }
-        // There's exactly 2 Save slots for every capture.
-        n / 2
-    }
-}
-
-struct Compiler<'r> {
-    insts: Vec<Inst>,
-    names: Vec<Option<String>>,
-}
-
-// The compiler implemented here is extremely simple. Most of the complexity
-// in this crate is in the parser or the VM.
-// The only tricky thing here is patching jump/split instructions to point to
-// the right instruction.
-impl<'r> Compiler<'r> {
-    fn compile(&mut self, ast: parse::Ast) {
-        match ast {
-            Nothing => {},
-            Literal(c, flags) => self.push(OneChar(c, flags)),
-            Dot(nl) => self.push(Any(nl)),
-            AstClass(ranges, flags) =>
-                self.push(CharClass(ranges, flags)),
-            Begin(flags) => self.push(EmptyBegin(flags)),
-            End(flags) => self.push(EmptyEnd(flags)),
-            WordBoundary(flags) => self.push(EmptyWordBoundary(flags)),
-            Capture(cap, name, x) => {
-                let len = self.names.len();
-                if cap >= len {
-                    self.names.extend(repeat(None).take(10 + cap - len))
-                }
-                self.names[cap] = name;
-
-                self.push(Save(2 * cap));
-                self.compile(*x);
-                self.push(Save(2 * cap + 1));
-            }
-            Cat(xs) => {
-                for x in xs.into_iter() {
-                    self.compile(x)
-                }
-            }
-            Alt(x, y) => {
-                let split = self.empty_split(); // push: split 0, 0
-                let j1 = self.insts.len();
-                self.compile(*x);                // push: insts for x
-                let jmp = self.empty_jump();    // push: jmp 0
-                let j2 = self.insts.len();
-                self.compile(*y);                // push: insts for y
-                let j3 = self.insts.len();
-
-                self.set_split(split, j1, j2);  // split 0, 0 -> split j1, j2
-                self.set_jump(jmp, j3);         // jmp 0      -> jmp j3
-            }
-            Rep(x, ZeroOne, g) => {
-                let split = self.empty_split();
-                let j1 = self.insts.len();
-                self.compile(*x);
-                let j2 = self.insts.len();
-
-                if g.is_greedy() {
-                    self.set_split(split, j1, j2);
-                } else {
-                    self.set_split(split, j2, j1);
-                }
-            }
-            Rep(x, ZeroMore, g) => {
-                let j1 = self.insts.len();
-                let split = self.empty_split();
-                let j2 = self.insts.len();
-                self.compile(*x);
-                let jmp = self.empty_jump();
-                let j3 = self.insts.len();
-
-                self.set_jump(jmp, j1);
-                if g.is_greedy() {
-                    self.set_split(split, j2, j3);
-                } else {
-                    self.set_split(split, j3, j2);
-                }
-            }
-            Rep(x, OneMore, g) => {
-                let j1 = self.insts.len();
-                self.compile(*x);
-                let split = self.empty_split();
-                let j2 = self.insts.len();
-
-                if g.is_greedy() {
-                    self.set_split(split, j1, j2);
-                } else {
-                    self.set_split(split, j2, j1);
-                }
-            }
-        }
-    }
-
-    /// Appends the given instruction to the program.
-    #[inline]
-    fn push(&mut self, x: Inst) {
-        self.insts.push(x)
-    }
-
-    /// Appends an *empty* `Split` instruction to the program and returns
-    /// the index of that instruction. (The index can then be used to "patch"
-    /// the actual locations of the split in later.)
-    #[inline]
-    fn empty_split(&mut self) -> InstIdx {
-        self.insts.push(Split(0, 0));
-        self.insts.len() - 1
-    }
-
-    /// Sets the left and right locations of a `Split` instruction at index
-    /// `i` to `pc1` and `pc2`, respectively.
-    /// If the instruction at index `i` isn't a `Split` instruction, then
-    /// `panic!` is called.
-    #[inline]
-    fn set_split(&mut self, i: InstIdx, pc1: InstIdx, pc2: InstIdx) {
-        let split = &mut self.insts[i];
-        match *split {
-            Split(_, _) => *split = Split(pc1, pc2),
-            _ => panic!("BUG: Invalid split index."),
-        }
-    }
-
-    /// Appends an *empty* `Jump` instruction to the program and returns the
-    /// index of that instruction.
-    #[inline]
-    fn empty_jump(&mut self) -> InstIdx {
-        self.insts.push(Jump(0));
-        self.insts.len() - 1
-    }
-
-    /// Sets the location of a `Jump` instruction at index `i` to `pc`.
-    /// If the instruction at index `i` isn't a `Jump` instruction, then
-    /// `panic!` is called.
-    #[inline]
-    fn set_jump(&mut self, i: InstIdx, pc: InstIdx) {
-        let jmp = &mut self.insts[i];
-        match *jmp {
-            Jump(_) => *jmp = Jump(pc),
-            _ => panic!("BUG: Invalid jump index."),
-        }
-    }
-}