diff options
| author | Huon Wilson <dbau.pp+github@gmail.com> | 2014-02-23 16:40:04 +1100 |
|---|---|---|
| committer | Alex Crichton <alex@alexcrichton.com> | 2014-02-24 21:22:26 -0800 |
| commit | ff79a4471cbf5fa4e78fcf56be129a3d56690127 (patch) | |
| tree | 6835775de94115c4f499981198c8b618330dd2fd /src | |
| parent | dad52cfcb58cb30170c6247f2053bc0f0d57466a (diff) | |
| download | rust-ff79a4471cbf5fa4e78fcf56be129a3d56690127.tar.gz rust-ff79a4471cbf5fa4e78fcf56be129a3d56690127.zip | |
syntax: record multibyte chars' positions absolutely, not relative to
file. Previously multibyte UTF-8 chars were being recorded as byte offsets from the start of the file, and then later compared against global byte positions, resulting in the compiler possibly thinking it had a byte position pointing inside a multibyte character, if there were multibyte characters in any non-crate files. (Although, sometimes the byte offsets line up just right to not ICE, but that was a coincidence.) Fixes #11136. Fixes #11178.
Diffstat (limited to 'src')
| -rw-r--r-- | src/libsyntax/parse/lexer.rs | 3 | ||||
| -rw-r--r-- | src/test/run-make/unicode-input/Makefile | 6 | ||||
| -rw-r--r-- | src/test/run-make/unicode-input/multiple_files.rs | 54 |
3 files changed, 61 insertions, 2 deletions
diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index b711e95bc94..5bace75a5ea 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -264,8 +264,7 @@ pub fn bump(rdr: &StringReader) { } if byte_offset_diff > 1 { - rdr.filemap.record_multibyte_char( - Pos::from_uint(current_byte_offset), byte_offset_diff); + rdr.filemap.record_multibyte_char(rdr.last_pos.get(), byte_offset_diff); } } else { rdr.curr.set(None); diff --git a/src/test/run-make/unicode-input/Makefile b/src/test/run-make/unicode-input/Makefile new file mode 100644 index 00000000000..1e420bddb77 --- /dev/null +++ b/src/test/run-make/unicode-input/Makefile @@ -0,0 +1,6 @@ +-include ../tools.mk + +all: + # check that we don't ICE on unicode input, issue #11178 + $(RUSTC) multiple_files.rs + $(call RUN,multiple_files) "$(RUSTC)" "$(TMPDIR)" diff --git a/src/test/run-make/unicode-input/multiple_files.rs b/src/test/run-make/unicode-input/multiple_files.rs new file mode 100644 index 00000000000..2758ac12bab --- /dev/null +++ b/src/test/run-make/unicode-input/multiple_files.rs @@ -0,0 +1,54 @@ +use std::{char, os, run, str}; +use std::rand::{task_rng, Rng}; +use std::io::File; + +// creates unicode_input_multiple_files_{main,chars}.rs, where the +// former imports the latter. `_chars` just contains an indentifier +// made up of random characters, because will emit an error message +// about the ident being in the wrong place, with a span (and creating +// this span used to upset the compiler). + +fn random_char() -> char { + let mut rng = task_rng(); + // a subset of the XID_start unicode table (ensuring that the + // compiler doesn't fail with an "unrecognised token" error) + let (lo, hi): (u32, u32) = match rng.gen_range(1, 4 + 1) { + 1 => (0x41, 0x5a), + 2 => (0xf8, 0x1ba), + 3 => (0x1401, 0x166c), + _ => (0x10400, 0x1044f) + }; + + char::from_u32(rng.gen_range(lo, hi + 1)).unwrap() +} + +fn main() { + let args = os::args(); + let rustc = args[1].as_slice(); + let tmpdir = Path::new(args[2].as_slice()); + + let main_file = tmpdir.join("unicode_input_multiple_files_main.rs"); + let main_file_str = main_file.as_str().unwrap(); + { + let _ = File::create(&main_file).unwrap() + .write_str("mod unicode_input_multiple_files_chars;"); + } + + for _ in range(0, 100) { + { + let mut w = File::create(&tmpdir.join("unicode_input_multiple_files_chars.rs")).unwrap(); + for _ in range(0, 30) { + let _ = w.write_char(random_char()); + } + } + + // rustc is passed to us with --out-dir and -L etc., so we + // can't exec it directly + let result = run::process_output("sh", [~"-c", rustc + " " + main_file_str]).unwrap(); + let err = str::from_utf8_lossy(result.error); + + // positive test so that this test will be updated when the + // compiler changes. + assert!(err.as_slice().contains("expected item but found")) + } +} |
