diff options
| author | Aleksey Kladov <aleksey.kladov@gmail.com> | 2020-08-29 12:10:16 +0200 |
|---|---|---|
| committer | Aleksey Kladov <aleksey.kladov@gmail.com> | 2020-08-30 19:53:36 +0200 |
| commit | ccffea5b6b3372cefd4e15bc738a2669bc6f69a0 (patch) | |
| tree | 161f0e5bad86487777bf89507534de16d17cb157 | |
| parent | 85fbf49ce0e2274d0acf798f6e703747674feec3 (diff) | |
| download | rust-ccffea5b6b3372cefd4e15bc738a2669bc6f69a0.tar.gz rust-ccffea5b6b3372cefd4e15bc738a2669bc6f69a0.zip | |
Move lexer unit tests to rustc_lexer
StringReader is an intornal abstraction which at the moment changes a lot, so these unit tests cause quite a bit of friction. Moving them to rustc_lexer and more ingerated-testing style should make them much less annoying, hopefully without decreasing their usefulness much. Note that coloncolon tests are removed (it's unclear what those are testing). \r\n tests are removed as well, as we normalize line endings even before lexing.
| -rw-r--r-- | Cargo.lock | 15 | ||||
| -rw-r--r-- | compiler/rustc_expand/src/lib.rs | 5 | ||||
| -rw-r--r-- | compiler/rustc_expand/src/parse/lexer/tests.rs | 252 | ||||
| -rw-r--r-- | compiler/rustc_lexer/src/tests.rs | 160 |
4 files changed, 154 insertions, 278 deletions
diff --git a/Cargo.lock b/Cargo.lock index ffc1f0dec1d..d5493969433 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -342,7 +342,7 @@ dependencies = [ name = "cargo-miri" version = "0.1.0" dependencies = [ - "cargo_metadata 0.11.1", + "cargo_metadata 0.9.1", "directories", "rustc-workspace-hack", "rustc_version", @@ -393,6 +393,18 @@ dependencies = [ [[package]] name = "cargo_metadata" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46e3374c604fb39d1a2f35ed5e4a4e30e60d01fab49446e08f1b3e9a90aef202" +dependencies = [ + "semver 0.9.0", + "serde", + "serde_derive", + "serde_json", +] + +[[package]] +name = "cargo_metadata" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89fec17b16f1ac67908af82e47d0a90a7afd0e1827b181cd77504323d3263d35" @@ -1979,6 +1991,7 @@ dependencies = [ name = "miri" version = "0.1.0" dependencies = [ + "byteorder", "colored", "compiletest_rs", "env_logger 0.7.1", diff --git a/compiler/rustc_expand/src/lib.rs b/compiler/rustc_expand/src/lib.rs index 7f631cb71af..5436b1ef737 100644 --- a/compiler/rustc_expand/src/lib.rs +++ b/compiler/rustc_expand/src/lib.rs @@ -39,11 +39,6 @@ mod tests; mod parse { #[cfg(test)] mod tests; - #[cfg(test)] - mod lexer { - #[cfg(test)] - mod tests; - } } #[cfg(test)] mod tokenstream { diff --git a/compiler/rustc_expand/src/parse/lexer/tests.rs b/compiler/rustc_expand/src/parse/lexer/tests.rs deleted file mode 100644 index 87184444283..00000000000 --- a/compiler/rustc_expand/src/parse/lexer/tests.rs +++ /dev/null @@ -1,252 +0,0 @@ -use rustc_ast::ast::AttrStyle; -use rustc_ast::token::{self, CommentKind, Token, TokenKind}; -use rustc_data_structures::sync::Lrc; -use rustc_errors::{emitter::EmitterWriter, Handler}; -use rustc_parse::lexer::StringReader; -use rustc_session::parse::ParseSess; -use rustc_span::source_map::{FilePathMapping, SourceMap}; -use rustc_span::symbol::Symbol; -use rustc_span::with_default_session_globals; -use rustc_span::{BytePos, Span}; - -use std::io; -use std::path::PathBuf; - -fn mk_sess(sm: Lrc<SourceMap>) -> ParseSess { - let emitter = EmitterWriter::new( - Box::new(io::sink()), - Some(sm.clone()), - false, - false, - false, - None, - false, - ); - ParseSess::with_span_handler(Handler::with_emitter(true, None, Box::new(emitter)), sm) -} - -// Creates a string reader for the given string. -fn setup<'a>(sm: &SourceMap, sess: &'a ParseSess, teststr: String) -> StringReader<'a> { - let sf = sm.new_source_file(PathBuf::from(teststr.clone()).into(), teststr); - StringReader::new(sess, sf, None) -} - -#[test] -fn t1() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - let mut string_reader = setup( - &sm, - &sh, - "/* my source file */ fn main() { println!(\"zebra\"); }\n".to_string(), - ); - assert_eq!(string_reader.next_token(), token::Comment); - assert_eq!(string_reader.next_token(), token::Whitespace); - let tok1 = string_reader.next_token(); - let tok2 = Token::new(mk_ident("fn"), Span::with_root_ctxt(BytePos(21), BytePos(23))); - assert_eq!(tok1.kind, tok2.kind); - assert_eq!(tok1.span, tok2.span); - assert_eq!(string_reader.next_token(), token::Whitespace); - // Read another token. - let tok3 = string_reader.next_token(); - assert_eq!(string_reader.pos(), BytePos(28)); - let tok4 = Token::new(mk_ident("main"), Span::with_root_ctxt(BytePos(24), BytePos(28))); - assert_eq!(tok3.kind, tok4.kind); - assert_eq!(tok3.span, tok4.span); - - assert_eq!(string_reader.next_token(), token::OpenDelim(token::Paren)); - assert_eq!(string_reader.pos(), BytePos(29)) - }) -} - -// Checks that the given reader produces the desired stream -// of tokens (stop checking after exhausting `expected`). -fn check_tokenization(mut string_reader: StringReader<'_>, expected: Vec<TokenKind>) { - for expected_tok in &expected { - assert_eq!(&string_reader.next_token(), expected_tok); - } -} - -// Makes the identifier by looking up the string in the interner. -fn mk_ident(id: &str) -> TokenKind { - token::Ident(Symbol::intern(id), false) -} - -fn mk_lit(kind: token::LitKind, symbol: &str, suffix: Option<&str>) -> TokenKind { - TokenKind::lit(kind, Symbol::intern(symbol), suffix.map(Symbol::intern)) -} - -#[test] -fn doublecolon_parsing() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization( - setup(&sm, &sh, "a b".to_string()), - vec![mk_ident("a"), token::Whitespace, mk_ident("b")], - ); - }) -} - -#[test] -fn doublecolon_parsing_2() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization( - setup(&sm, &sh, "a::b".to_string()), - vec![mk_ident("a"), token::Colon, token::Colon, mk_ident("b")], - ); - }) -} - -#[test] -fn doublecolon_parsing_3() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization( - setup(&sm, &sh, "a ::b".to_string()), - vec![mk_ident("a"), token::Whitespace, token::Colon, token::Colon, mk_ident("b")], - ); - }) -} - -#[test] -fn doublecolon_parsing_4() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization( - setup(&sm, &sh, "a:: b".to_string()), - vec![mk_ident("a"), token::Colon, token::Colon, token::Whitespace, mk_ident("b")], - ); - }) -} - -#[test] -fn character_a() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!(setup(&sm, &sh, "'a'".to_string()).next_token(), mk_lit(token::Char, "a", None),); - }) -} - -#[test] -fn character_space() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!(setup(&sm, &sh, "' '".to_string()).next_token(), mk_lit(token::Char, " ", None),); - }) -} - -#[test] -fn character_escaped() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!( - setup(&sm, &sh, "'\\n'".to_string()).next_token(), - mk_lit(token::Char, "\\n", None), - ); - }) -} - -#[test] -fn lifetime_name() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!( - setup(&sm, &sh, "'abc".to_string()).next_token(), - token::Lifetime(Symbol::intern("'abc")), - ); - }) -} - -#[test] -fn raw_string() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!( - setup(&sm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token(), - mk_lit(token::StrRaw(3), "\"#a\\b\x00c\"", None), - ); - }) -} - -#[test] -fn literal_suffixes() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - macro_rules! test { - ($input: expr, $tok_type: ident, $tok_contents: expr) => {{ - assert_eq!( - setup(&sm, &sh, format!("{}suffix", $input)).next_token(), - mk_lit(token::$tok_type, $tok_contents, Some("suffix")), - ); - // with a whitespace separator - assert_eq!( - setup(&sm, &sh, format!("{} suffix", $input)).next_token(), - mk_lit(token::$tok_type, $tok_contents, None), - ); - }}; - } - - test!("'a'", Char, "a"); - test!("b'a'", Byte, "a"); - test!("\"a\"", Str, "a"); - test!("b\"a\"", ByteStr, "a"); - test!("1234", Integer, "1234"); - test!("0b101", Integer, "0b101"); - test!("0xABC", Integer, "0xABC"); - test!("1.0", Float, "1.0"); - test!("1.0e10", Float, "1.0e10"); - - assert_eq!( - setup(&sm, &sh, "2us".to_string()).next_token(), - mk_lit(token::Integer, "2", Some("us")), - ); - assert_eq!( - setup(&sm, &sh, "r###\"raw\"###suffix".to_string()).next_token(), - mk_lit(token::StrRaw(3), "raw", Some("suffix")), - ); - assert_eq!( - setup(&sm, &sh, "br###\"raw\"###suffix".to_string()).next_token(), - mk_lit(token::ByteStrRaw(3), "raw", Some("suffix")), - ); - }) -} - -#[test] -fn nested_block_comments() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - let mut lexer = setup(&sm, &sh, "/* /* */ */'a'".to_string()); - assert_eq!(lexer.next_token(), token::Comment); - assert_eq!(lexer.next_token(), mk_lit(token::Char, "a", None)); - }) -} - -#[test] -fn crlf_comments() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - let mut lexer = setup(&sm, &sh, "// test\r\n/// test\r\n".to_string()); - let comment = lexer.next_token(); - assert_eq!(comment.kind, token::Comment); - assert_eq!((comment.span.lo(), comment.span.hi()), (BytePos(0), BytePos(7))); - assert_eq!(lexer.next_token(), token::Whitespace); - assert_eq!( - lexer.next_token(), - token::DocComment(CommentKind::Line, AttrStyle::Outer, Symbol::intern(" test")) - ); - }) -} diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs index a1ea5ceb1f6..94017b7b286 100644 --- a/compiler/rustc_lexer/src/tests.rs +++ b/compiler/rustc_lexer/src/tests.rs @@ -129,6 +129,34 @@ fn check_lexing(src: &str, expect: Expect) { } #[test] +fn smoke_test() { + check_lexing( + "/* my source file */ fn main() { println!(\"zebra\"); }\n", + expect![[r#" + Token { kind: BlockComment { doc_style: None, terminated: true }, len: 20 } + Token { kind: Whitespace, len: 1 } + Token { kind: Ident, len: 2 } + Token { kind: Whitespace, len: 1 } + Token { kind: Ident, len: 4 } + Token { kind: OpenParen, len: 1 } + Token { kind: CloseParen, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: OpenBrace, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: Ident, len: 7 } + Token { kind: Bang, len: 1 } + Token { kind: OpenParen, len: 1 } + Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 7 }, len: 7 } + Token { kind: CloseParen, len: 1 } + Token { kind: Semi, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: CloseBrace, len: 1 } + Token { kind: Whitespace, len: 1 } + "#]], + ) +} + +#[test] fn comment_flavors() { check_lexing( r" @@ -143,25 +171,117 @@ fn comment_flavors() { /*! inner doc block */ ", expect![[r#" - Token { kind: Whitespace, len: 1 } - Token { kind: LineComment { doc_style: None }, len: 7 } - Token { kind: Whitespace, len: 1 } - Token { kind: LineComment { doc_style: None }, len: 17 } - Token { kind: Whitespace, len: 1 } - Token { kind: LineComment { doc_style: Some(Outer) }, len: 18 } - Token { kind: Whitespace, len: 1 } - Token { kind: LineComment { doc_style: Some(Inner) }, len: 18 } - Token { kind: Whitespace, len: 1 } - Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 } - Token { kind: Whitespace, len: 1 } - Token { kind: BlockComment { doc_style: None, terminated: true }, len: 4 } - Token { kind: Whitespace, len: 1 } - Token { kind: BlockComment { doc_style: None, terminated: true }, len: 18 } - Token { kind: Whitespace, len: 1 } - Token { kind: BlockComment { doc_style: Some(Outer), terminated: true }, len: 22 } - Token { kind: Whitespace, len: 1 } - Token { kind: BlockComment { doc_style: Some(Inner), terminated: true }, len: 22 } - Token { kind: Whitespace, len: 1 } - "#]], + Token { kind: Whitespace, len: 1 } + Token { kind: LineComment { doc_style: None }, len: 7 } + Token { kind: Whitespace, len: 1 } + Token { kind: LineComment { doc_style: None }, len: 17 } + Token { kind: Whitespace, len: 1 } + Token { kind: LineComment { doc_style: Some(Outer) }, len: 18 } + Token { kind: Whitespace, len: 1 } + Token { kind: LineComment { doc_style: Some(Inner) }, len: 18 } + Token { kind: Whitespace, len: 1 } + Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 } + Token { kind: Whitespace, len: 1 } + Token { kind: BlockComment { doc_style: None, terminated: true }, len: 4 } + Token { kind: Whitespace, len: 1 } + Token { kind: BlockComment { doc_style: None, terminated: true }, len: 18 } + Token { kind: Whitespace, len: 1 } + Token { kind: BlockComment { doc_style: Some(Outer), terminated: true }, len: 22 } + Token { kind: Whitespace, len: 1 } + Token { kind: BlockComment { doc_style: Some(Inner), terminated: true }, len: 22 } + Token { kind: Whitespace, len: 1 } + "#]], + ) +} + +#[test] +fn nested_block_comments() { + check_lexing( + "/* /* */ */'a'", + expect![[r#" + Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 } + Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } + "#]], + ) +} + +#[test] +fn characters() { + check_lexing( + "'a' ' ' '\\n'", + expect![[r#" + Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 4 }, len: 4 } + "#]], + ); +} + +#[test] +fn lifetime() { + check_lexing( + "'abc", + expect![[r#" + Token { kind: Lifetime { starts_with_number: false }, len: 4 } + "#]], + ); +} + +#[test] +fn raw_string() { + check_lexing( + "r###\"\"#a\\b\x00c\"\"###", + expect![[r#" + Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 17 }, len: 17 } + "#]], + ) +} + +#[test] +fn literal_suffixes() { + check_lexing( + r####" +'a' +b'a' +"a" +b"a" +1234 +0b101 +0xABC +1.0 +1.0e10 +2us +r###"raw"###suffix +br###"raw"###suffix +"####, + expect![[r#" + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Byte { terminated: true }, suffix_start: 4 }, len: 4 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: ByteStr { terminated: true }, suffix_start: 4 }, len: 4 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 4 }, len: 4 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Int { base: Binary, empty_int: false }, suffix_start: 5 }, len: 5 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Int { base: Hexadecimal, empty_int: false }, suffix_start: 5 }, len: 5 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Float { base: Decimal, empty_exponent: false }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Float { base: Decimal, empty_exponent: false }, suffix_start: 6 }, len: 6 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 12 }, len: 18 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: RawByteStr { n_hashes: 3, err: None }, suffix_start: 13 }, len: 19 } + Token { kind: Whitespace, len: 1 } + "#]], ) } |
