about summary refs log tree commit diff
diff options
context:
space:
mode:
authorLiigo Zhuang <com.liigo@gmail.com>2014-03-18 08:59:44 +0800
committerAlex Crichton <alex@alexcrichton.com>2014-03-18 13:49:11 -0700
commit20e178c5821b32d7a7deab70af90bf50f9d39df3 (patch)
tree4b3a240a0c3f51d46fcf0bc21b1a934b44905e8a
parent8f7a7970f3f01aa2266c6ce3ee031234b86b1b9b (diff)
downloadrust-20e178c5821b32d7a7deab70af90bf50f9d39df3.tar.gz
rust-20e178c5821b32d7a7deab70af90bf50f9d39df3.zip
libsyntax: librustdoc: ignore utf-8 BOM in .rs files
Closes #12974
-rw-r--r--src/librustdoc/html/render.rs9
-rw-r--r--src/libsyntax/codemap.rs11
-rw-r--r--src/test/run-pass/utf8-bom.rs13
3 files changed, 31 insertions, 2 deletions
diff --git a/src/librustdoc/html/render.rs b/src/librustdoc/html/render.rs
index 1ebb51cb65e..6ad7b7d9da1 100644
--- a/src/librustdoc/html/render.rs
+++ b/src/librustdoc/html/render.rs
@@ -463,6 +463,13 @@ impl<'a> SourceCollector<'a> {
         };
         let contents = str::from_utf8_owned(contents).unwrap();
 
+        // Remove the utf-8 BOM if any
+        let contents = if contents.starts_with("\ufeff") {
+            contents.as_slice().slice_from(3)
+        } else {
+            contents.as_slice()
+        };
+
         // Create the intermediate directories
         let mut cur = self.dst.clone();
         let mut root_path = ~"../../";
@@ -482,7 +489,7 @@ impl<'a> SourceCollector<'a> {
             root_path: root_path,
         };
         try!(layout::render(&mut w as &mut Writer, &self.cx.layout,
-                              &page, &(""), &Source(contents.as_slice())));
+                              &page, &(""), &Source(contents)));
         try!(w.flush());
         return Ok(());
     }
diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs
index 4bfd5391a8f..d93b5803eac 100644
--- a/src/libsyntax/codemap.rs
+++ b/src/libsyntax/codemap.rs
@@ -271,13 +271,22 @@ impl CodeMap {
         }
     }
 
-    pub fn new_filemap(&self, filename: FileName, mut src: ~str) -> Rc<FileMap> {
+    pub fn new_filemap(&self, filename: FileName, src: ~str) -> Rc<FileMap> {
         let mut files = self.files.borrow_mut();
         let start_pos = match files.get().last() {
             None => 0,
             Some(last) => last.deref().start_pos.to_uint() + last.deref().src.len(),
         };
 
+        // Remove utf-8 BOM if any.
+        // FIXME #12884: no efficient/safe way to remove from the start of a string
+        // and reuse the allocation.
+        let mut src = if src.starts_with("\ufeff") {
+            src.as_slice().slice_from(3).into_owned()
+        } else {
+            src
+        };
+
         // Append '\n' in case it's not already there.
         // This is a workaround to prevent CodeMap.lookup_filemap_idx from accidentally
         // overflowing into the next filemap in case the last byte of span is also the last
diff --git a/src/test/run-pass/utf8-bom.rs b/src/test/run-pass/utf8-bom.rs
new file mode 100644
index 00000000000..ccd40cb88fe
--- /dev/null
+++ b/src/test/run-pass/utf8-bom.rs
@@ -0,0 +1,13 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// This file has utf-8 BOM, it should be compiled normally without error.
+
+pub fn main() {}