about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMazdak Farrokhzad <twingoow@gmail.com>2019-08-16 18:22:24 +0200
committerGitHub <noreply@github.com>2019-08-16 18:22:24 +0200
commitc83d3c32815c13d9fa2e881ed97b7163030cd8cc (patch)
treeb427373656728960e894f6ea9fca4d67a88785fb
parentdb3bae01708b41f7fd9bb75830cd9163b8b10e48 (diff)
parent14bc998df9f15042342ac8e649a4adadf17a65f8 (diff)
downloadrust-c83d3c32815c13d9fa2e881ed97b7163030cd8cc.tar.gz
rust-c83d3c32815c13d9fa2e881ed97b7163030cd8cc.zip
Rollup merge of #63525 - matklad:centraliza-file-loading, r=petrochenkov
Make sure that all file loading happens via SourceMap

That way, callers don't need to repeat "let's add this to sm manually
for tracking dependencies" trick.

It should make it easier to switch to using `FileLoader` for binary
files in the future as well

cc #62948

r? @petrochenkov
-rw-r--r--src/libsyntax/ext/expand.rs13
-rw-r--r--src/libsyntax/source_map.rs20
-rw-r--r--src/libsyntax_ext/source_util.rs37
-rw-r--r--src/test/ui/.gitattributes1
-rw-r--r--src/test/ui/include-macros/data.bin2
-rw-r--r--src/test/ui/include-macros/normalization.rs12
6 files changed, 51 insertions, 34 deletions
diff --git a/src/libsyntax/ext/expand.rs b/src/libsyntax/ext/expand.rs
index e7deadbc9a0..dac402921b9 100644
--- a/src/libsyntax/ext/expand.rs
+++ b/src/libsyntax/ext/expand.rs
@@ -25,7 +25,6 @@ use syntax_pos::{Span, DUMMY_SP, FileName};
 
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sync::Lrc;
-use std::fs;
 use std::io::ErrorKind;
 use std::{iter, mem};
 use std::ops::DerefMut;
@@ -1241,13 +1240,11 @@ impl<'a, 'b> MutVisitor for InvocationCollector<'a, 'b> {
                     }
 
                     let filename = self.cx.resolve_path(&*file.as_str(), it.span());
-                    match fs::read_to_string(&filename) {
-                        Ok(src) => {
-                            let src_interned = Symbol::intern(&src);
-
-                            // Add this input file to the code map to make it available as
-                            // dependency information
-                            self.cx.source_map().new_source_file(filename.into(), src);
+                    match self.cx.source_map().load_file(&filename) {
+                        Ok(source_file) => {
+                            let src = source_file.src.as_ref()
+                                .expect("freshly loaded file should have a source");
+                            let src_interned = Symbol::intern(src.as_str());
 
                             let include_info = vec![
                                 ast::NestedMetaItem::MetaItem(
diff --git a/src/libsyntax/source_map.rs b/src/libsyntax/source_map.rs
index 940687cb5d4..7190cfd72a9 100644
--- a/src/libsyntax/source_map.rs
+++ b/src/libsyntax/source_map.rs
@@ -171,6 +171,26 @@ impl SourceMap {
         Ok(self.new_source_file(filename, src))
     }
 
+    /// Loads source file as a binary blob.
+    ///
+    /// Unlike `load_file`, guarantees that no normalization like BOM-removal
+    /// takes place.
+    pub fn load_binary_file(&self, path: &Path) -> io::Result<Vec<u8>> {
+        // Ideally, this should use `self.file_loader`, but it can't
+        // deal with binary files yet.
+        let bytes = fs::read(path)?;
+
+        // We need to add file to the `SourceMap`, so that it is present
+        // in dep-info. There's also an edge case that file might be both
+        // loaded as a binary via `include_bytes!` and as proper `SourceFile`
+        // via `mod`, so we try to use real file contents and not just an
+        // empty string.
+        let text = std::str::from_utf8(&bytes).unwrap_or("")
+            .to_string();
+        self.new_source_file(path.to_owned().into(), text);
+        Ok(bytes)
+    }
+
     pub fn files(&self) -> MappedLockGuard<'_, Vec<Lrc<SourceFile>>> {
         LockGuard::map(self.files.borrow(), |files| &mut files.source_files)
     }
diff --git a/src/libsyntax_ext/source_util.rs b/src/libsyntax_ext/source_util.rs
index cbc01b48afd..e008ed710e4 100644
--- a/src/libsyntax_ext/source_util.rs
+++ b/src/libsyntax_ext/source_util.rs
@@ -9,8 +9,6 @@ use syntax::tokenstream;
 use smallvec::SmallVec;
 use syntax_pos::{self, Pos, Span};
 
-use std::fs;
-use std::io::ErrorKind;
 use rustc_data_structures::sync::Lrc;
 
 // These macros all relate to the file system; they either return
@@ -114,20 +112,17 @@ pub fn expand_include_str(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream::To
         None => return DummyResult::any(sp)
     };
     let file = cx.resolve_path(file, sp);
-    match fs::read_to_string(&file) {
-        Ok(src) => {
-            let interned_src = Symbol::intern(&src);
-
-            // Add this input file to the code map to make it available as
-            // dependency information
-            cx.source_map().new_source_file(file.into(), src);
-
-            base::MacEager::expr(cx.expr_str(sp, interned_src))
+    match cx.source_map().load_binary_file(&file) {
+        Ok(bytes) => match std::str::from_utf8(&bytes) {
+            Ok(src) => {
+                let interned_src = Symbol::intern(&src);
+                base::MacEager::expr(cx.expr_str(sp, interned_src))
+            }
+            Err(_) => {
+                cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display()));
+                DummyResult::any(sp)
+            }
         },
-        Err(ref e) if e.kind() == ErrorKind::InvalidData => {
-            cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display()));
-            DummyResult::any(sp)
-        }
         Err(e) => {
             cx.span_err(sp, &format!("couldn't read {}: {}", file.display(), e));
             DummyResult::any(sp)
@@ -142,18 +137,8 @@ pub fn expand_include_bytes(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream::
         None => return DummyResult::any(sp)
     };
     let file = cx.resolve_path(file, sp);
-    match fs::read(&file) {
+    match cx.source_map().load_binary_file(&file) {
         Ok(bytes) => {
-            // Add the contents to the source map if it contains UTF-8.
-            let (contents, bytes) = match String::from_utf8(bytes) {
-                Ok(s) => {
-                    let bytes = s.as_bytes().to_owned();
-                    (s, bytes)
-                },
-                Err(e) => (String::new(), e.into_bytes()),
-            };
-            cx.source_map().new_source_file(file.into(), contents);
-
             base::MacEager::expr(cx.expr_lit(sp, ast::LitKind::ByteStr(Lrc::new(bytes))))
         },
         Err(e) => {
diff --git a/src/test/ui/.gitattributes b/src/test/ui/.gitattributes
index b62ade73aa9..489dc8ad111 100644
--- a/src/test/ui/.gitattributes
+++ b/src/test/ui/.gitattributes
@@ -1,2 +1,3 @@
 lexer-crlf-line-endings-string-literal-doc-comment.rs -text
 trailing-carriage-return-in-string.rs -text
+*.bin -text
diff --git a/src/test/ui/include-macros/data.bin b/src/test/ui/include-macros/data.bin
new file mode 100644
index 00000000000..ce4e0b8311a
--- /dev/null
+++ b/src/test/ui/include-macros/data.bin
@@ -0,0 +1,2 @@
+This file starts with BOM.

+Lines are separated by \r\n.

diff --git a/src/test/ui/include-macros/normalization.rs b/src/test/ui/include-macros/normalization.rs
new file mode 100644
index 00000000000..889f08e606e
--- /dev/null
+++ b/src/test/ui/include-macros/normalization.rs
@@ -0,0 +1,12 @@
+// run-pass
+
+fn main() {
+    assert_eq!(
+        &include_bytes!("data.bin")[..],
+        &b"\xEF\xBB\xBFThis file starts with BOM.\r\nLines are separated by \\r\\n.\r\n"[..],
+    );
+    assert_eq!(
+        include_str!("data.bin"),
+        "\u{FEFF}This file starts with BOM.\r\nLines are separated by \\r\\n.\r\n",
+    );
+}