src/libsyntax/util/parser_testing.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170

// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use ast;
use parse::{new_parse_sess};
use parse::{ParseSess,string_to_filemap,filemap_to_tts};
use parse::{new_parser_from_source_str};
use parse::parser::Parser;
use parse::token;

// map a string to tts, using a made-up filename: return both the TokenTree's
// and the ParseSess
pub fn string_to_tts_and_sess (source_str : ~str) -> (~[ast::TokenTree], @ParseSess) {
    let ps = new_parse_sess();
    (filemap_to_tts(ps,string_to_filemap(ps,source_str,~"bogofile")),ps)
}

// map a string to tts, using a made-up filename:
pub fn string_to_tts(source_str : ~str) -> ~[ast::TokenTree] {
    let (tts,_) = string_to_tts_and_sess(source_str);
    tts
}

pub fn string_to_parser_and_sess(source_str: ~str) -> (Parser,@ParseSess) {
    let ps = new_parse_sess();
    (new_parser_from_source_str(ps,~[],~"bogofile",source_str),ps)
}

// map string to parser (via tts)
pub fn string_to_parser(source_str: ~str) -> Parser {
    let (p,_) = string_to_parser_and_sess(source_str);
    p
}

fn with_error_checking_parse<T>(s: ~str, f: |&mut Parser| -> T) -> T {
    let mut p = string_to_parser(s);
    let x = f(&mut p);
    p.abort_if_errors();
    x
}

// parse a string, return a crate.
pub fn string_to_crate (source_str : ~str) -> ast::Crate {
    with_error_checking_parse(source_str, |p| {
        p.parse_crate_mod()
    })
}

// parse a string, return a crate and the ParseSess
pub fn string_to_crate_and_sess (source_str : ~str) -> (ast::Crate,@ParseSess) {
    let (mut p,ps) = string_to_parser_and_sess(source_str);
    (p.parse_crate_mod(),ps)
}

// parse a string, return an expr
pub fn string_to_expr (source_str : ~str) -> @ast::Expr {
    with_error_checking_parse(source_str, |p| {
        p.parse_expr()
    })
}

// parse a string, return an item
pub fn string_to_item (source_str : ~str) -> Option<@ast::Item> {
    with_error_checking_parse(source_str, |p| {
        p.parse_item(~[])
    })
}

// parse a string, return a stmt
pub fn string_to_stmt(source_str : ~str) -> @ast::Stmt {
    with_error_checking_parse(source_str, |p| {
        p.parse_stmt(~[])
    })
}

// parse a string, return a pat. Uses "irrefutable"... which doesn't
// (currently) affect parsing.
pub fn string_to_pat(source_str : ~str) -> @ast::Pat {
    string_to_parser(source_str).parse_pat()
}

// convert a vector of strings to a vector of ast::Ident's
pub fn strs_to_idents(ids: ~[&str]) -> ~[ast::Ident] {
    ids.map(|u| token::str_to_ident(*u))
}

// does the given string match the pattern? whitespace in the first string
// may be deleted or replaced with other whitespace to match the pattern.
// this function is unicode-ignorant; fortunately, the careful design of
// UTF-8 mitigates this ignorance.  In particular, this function only collapses
// sequences of \n, \r, ' ', and \t, but it should otherwise tolerate unicode
// chars. Unsurprisingly, it doesn't do NKF-normalization(?).
pub fn matches_codepattern(a : &str, b : &str) -> bool {
    let mut idx_a = 0;
    let mut idx_b = 0;
    loop {
        if idx_a == a.len() && idx_b == b.len() {
            return true;
        }
        else if idx_a == a.len() {return false;}
        else if idx_b == b.len() {
            // maybe the stuff left in a is all ws?
            if is_whitespace(a.char_at(idx_a)) {
                return scan_for_non_ws_or_end(a,idx_a) == a.len();
            } else {
                return false;
            }
        }
        // ws in both given and pattern:
        else if is_whitespace(a.char_at(idx_a))
           && is_whitespace(b.char_at(idx_b)) {
            idx_a = scan_for_non_ws_or_end(a,idx_a);
            idx_b = scan_for_non_ws_or_end(b,idx_b);
        }
        // ws in given only:
        else if is_whitespace(a.char_at(idx_a)) {
            idx_a = scan_for_non_ws_or_end(a,idx_a);
        }
        // *don't* silently eat ws in expected only.
        else if a.char_at(idx_a) == b.char_at(idx_b) {
            idx_a += 1;
            idx_b += 1;
        }
        else {
            return false;
        }
    }
}

// given a string and an index, return the first uint >= idx
// that is a non-ws-char or is outside of the legal range of
// the string.
fn scan_for_non_ws_or_end(a : &str, idx: uint) -> uint {
    let mut i = idx;
    let len = a.len();
    while (i < len) && (is_whitespace(a.char_at(i))) {
        i += 1;
    }
    i
}

// copied from lexer.
pub fn is_whitespace(c: char) -> bool {
    return c == ' ' || c == '\t' || c == '\r' || c == '\n';
}

#[cfg(test)]
mod test {
    use super::*;

    #[test] fn eqmodws() {
        assert_eq!(matches_codepattern("",""),true);
        assert_eq!(matches_codepattern("","a"),false);
        assert_eq!(matches_codepattern("a",""),false);
        assert_eq!(matches_codepattern("a","a"),true);
        assert_eq!(matches_codepattern("a b","a   \n\t\r  b"),true);
        assert_eq!(matches_codepattern("a b ","a   \n\t\r  b"),true);
        assert_eq!(matches_codepattern("a b","a   \n\t\r  b "),false);
        assert_eq!(matches_codepattern("a   b","a b"),true);
        assert_eq!(matches_codepattern("ab","a b"),false);
        assert_eq!(matches_codepattern("a   b","ab"),true);
    }
}