about summary refs log tree commit diff
path: root/src/libsyntax
diff options
context:
space:
mode:
authorMazdak Farrokhzad <twingoow@gmail.com>2019-10-25 13:12:45 +0200
committerGitHub <noreply@github.com>2019-10-25 13:12:45 +0200
commit1f93be1bb3f89d6b30a3ddc39e8a462924ccd503 (patch)
treeddc93b333040abfda9dd252818e026760f8e0fa3 /src/libsyntax
parent959b6e324ce2786a4adade6cef222ffbd20f3791 (diff)
parentff1860ad763baac652d3a43a93985e29ade805cb (diff)
downloadrust-1f93be1bb3f89d6b30a3ddc39e8a462924ccd503.tar.gz
rust-1f93be1bb3f89d6b30a3ddc39e8a462924ccd503.zip
Rollup merge of #65074 - Rantanen:json-byte-pos, r=matklad
Fix the start/end byte positions in the compiler JSON output

Track the changes made during normalization in the `SourceFile` and use this information to correct the `start_byte` and `end_byte` fields in the JSON output.

This should ensure the start/end byte fields can be used to index the original file, even if Rust normalized the source code for parsing purposes. Both CRLF to LF and BOM removal are handled with this one.

The rough plan was discussed with @matklad in rust-lang-nursery/rustfix#176 - although I ended up going with `u32` offset tracking so I wouldn't need to deal with `u32 + i32` arithmetics when applying the offset to the span byte positions.

Fixes #65029
Diffstat (limited to 'src/libsyntax')
-rw-r--r--src/libsyntax/json.rs7
-rw-r--r--src/libsyntax/json/tests.rs186
-rw-r--r--src/libsyntax/source_map.rs6
-rw-r--r--src/libsyntax/tests.rs4
4 files changed, 199 insertions, 4 deletions
diff --git a/src/libsyntax/json.rs b/src/libsyntax/json.rs
index e3296788d9f..0b157938375 100644
--- a/src/libsyntax/json.rs
+++ b/src/libsyntax/json.rs
@@ -25,6 +25,9 @@ use std::sync::{Arc, Mutex};
 
 use rustc_serialize::json::{as_json, as_pretty_json};
 
+#[cfg(test)]
+mod tests;
+
 pub struct JsonEmitter {
     dst: Box<dyn Write + Send>,
     registry: Option<Registry>,
@@ -336,8 +339,8 @@ impl DiagnosticSpan {
 
         DiagnosticSpan {
             file_name: start.file.name.to_string(),
-            byte_start: span.lo().0 - start.file.start_pos.0,
-            byte_end: span.hi().0 - start.file.start_pos.0,
+            byte_start: start.file.original_relative_byte_pos(span.lo()).0,
+            byte_end: start.file.original_relative_byte_pos(span.hi()).0,
             line_start: start.line,
             line_end: end.line,
             column_start: start.col.0 + 1,
diff --git a/src/libsyntax/json/tests.rs b/src/libsyntax/json/tests.rs
new file mode 100644
index 00000000000..eb0d9ef3947
--- /dev/null
+++ b/src/libsyntax/json/tests.rs
@@ -0,0 +1,186 @@
+use super::*;
+
+use crate::json::JsonEmitter;
+use crate::source_map::{FilePathMapping, SourceMap};
+use crate::tests::Shared;
+use crate::with_default_globals;
+
+use errors::emitter::{ColorConfig, HumanReadableErrorType};
+use errors::Handler;
+use rustc_serialize::json::decode;
+use syntax_pos::{BytePos, Span};
+
+use std::str;
+
+#[derive(RustcDecodable, Debug, PartialEq, Eq)]
+struct TestData {
+    spans: Vec<SpanTestData>,
+}
+
+#[derive(RustcDecodable, Debug, PartialEq, Eq)]
+struct SpanTestData {
+    pub byte_start: u32,
+    pub byte_end: u32,
+    pub line_start: u32,
+    pub column_start: u32,
+    pub line_end: u32,
+    pub column_end: u32,
+}
+
+/// Test the span yields correct positions in JSON.
+fn test_positions(code: &str, span: (u32, u32), expected_output: SpanTestData) {
+    let expected_output = TestData { spans: vec![expected_output] };
+
+    with_default_globals(|| {
+        let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
+        sm.new_source_file(Path::new("test.rs").to_owned().into(), code.to_owned());
+
+        let output = Arc::new(Mutex::new(Vec::new()));
+        let je = JsonEmitter::new(
+            Box::new(Shared { data: output.clone() }),
+            None,
+            sm,
+            true,
+            HumanReadableErrorType::Short(ColorConfig::Never),
+            false,
+        );
+
+        let span = Span::with_root_ctxt(BytePos(span.0), BytePos(span.1));
+        let handler = Handler::with_emitter(true, None, Box::new(je));
+        handler.span_err(span, "foo");
+
+        let bytes = output.lock().unwrap();
+        let actual_output = str::from_utf8(&bytes).unwrap();
+        let actual_output: TestData = decode(actual_output).unwrap();
+
+        assert_eq!(expected_output, actual_output)
+    })
+}
+
+#[test]
+fn empty() {
+    test_positions(
+        " ",
+        (0, 1),
+        SpanTestData {
+            byte_start: 0,
+            byte_end: 1,
+            line_start: 1,
+            column_start: 1,
+            line_end: 1,
+            column_end: 2,
+        },
+    )
+}
+
+#[test]
+fn bom() {
+    test_positions(
+        "\u{feff} ",
+        (0, 1),
+        SpanTestData {
+            byte_start: 3,
+            byte_end: 4,
+            line_start: 1,
+            column_start: 1,
+            line_end: 1,
+            column_end: 2,
+        },
+    )
+}
+
+#[test]
+fn lf_newlines() {
+    test_positions(
+        "\nmod foo;\nmod bar;\n",
+        (5, 12),
+        SpanTestData {
+            byte_start: 5,
+            byte_end: 12,
+            line_start: 2,
+            column_start: 5,
+            line_end: 3,
+            column_end: 3,
+        },
+    )
+}
+
+#[test]
+fn crlf_newlines() {
+    test_positions(
+        "\r\nmod foo;\r\nmod bar;\r\n",
+        (5, 12),
+        SpanTestData {
+            byte_start: 6,
+            byte_end: 14,
+            line_start: 2,
+            column_start: 5,
+            line_end: 3,
+            column_end: 3,
+        },
+    )
+}
+
+#[test]
+fn crlf_newlines_with_bom() {
+    test_positions(
+        "\u{feff}\r\nmod foo;\r\nmod bar;\r\n",
+        (5, 12),
+        SpanTestData {
+            byte_start: 9,
+            byte_end: 17,
+            line_start: 2,
+            column_start: 5,
+            line_end: 3,
+            column_end: 3,
+        },
+    )
+}
+
+#[test]
+fn span_before_crlf() {
+    test_positions(
+        "foo\r\nbar",
+        (2, 3),
+        SpanTestData {
+            byte_start: 2,
+            byte_end: 3,
+            line_start: 1,
+            column_start: 3,
+            line_end: 1,
+            column_end: 4,
+        },
+    )
+}
+
+#[test]
+fn span_on_crlf() {
+    test_positions(
+        "foo\r\nbar",
+        (3, 4),
+        SpanTestData {
+            byte_start: 3,
+            byte_end: 5,
+            line_start: 1,
+            column_start: 4,
+            line_end: 2,
+            column_end: 1,
+        },
+    )
+}
+
+#[test]
+fn span_after_crlf() {
+    test_positions(
+        "foo\r\nbar",
+        (4, 5),
+        SpanTestData {
+            byte_start: 5,
+            byte_end: 6,
+            line_start: 2,
+            column_start: 1,
+            line_end: 2,
+            column_end: 2,
+        },
+    )
+}
diff --git a/src/libsyntax/source_map.rs b/src/libsyntax/source_map.rs
index a1d147637e2..d7760e0cf9e 100644
--- a/src/libsyntax/source_map.rs
+++ b/src/libsyntax/source_map.rs
@@ -283,6 +283,7 @@ impl SourceMap {
         mut file_local_lines: Vec<BytePos>,
         mut file_local_multibyte_chars: Vec<MultiByteChar>,
         mut file_local_non_narrow_chars: Vec<NonNarrowChar>,
+        mut file_local_normalized_pos: Vec<NormalizedPos>,
     ) -> Lrc<SourceFile> {
         let start_pos = self.next_start_pos();
 
@@ -301,6 +302,10 @@ impl SourceMap {
             *swc = *swc + start_pos;
         }
 
+        for nc in &mut file_local_normalized_pos {
+            nc.pos = nc.pos + start_pos;
+        }
+
         let source_file = Lrc::new(SourceFile {
             name: filename,
             name_was_remapped,
@@ -314,6 +319,7 @@ impl SourceMap {
             lines: file_local_lines,
             multibyte_chars: file_local_multibyte_chars,
             non_narrow_chars: file_local_non_narrow_chars,
+            normalized_pos: file_local_normalized_pos,
             name_hash,
         });
 
diff --git a/src/libsyntax/tests.rs b/src/libsyntax/tests.rs
index 881bdaa84d0..e73c8b43bcc 100644
--- a/src/libsyntax/tests.rs
+++ b/src/libsyntax/tests.rs
@@ -111,8 +111,8 @@ struct SpanLabel {
     label: &'static str,
 }
 
-struct Shared<T: Write> {
-    data: Arc<Mutex<T>>,
+crate struct Shared<T: Write> {
+    pub data: Arc<Mutex<T>>,
 }
 
 impl<T: Write> Write for Shared<T> {